# Policy Gradient

## 基于时序差分的行动-批判算法

In [1]:
import gym
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import rl_utils

In [2]:
class PolicyNet(torch.nn.Module):
    def __init__(self, state_dim, hidden_dim, action_dim):
        super(PolicyNet, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, action_dim)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return F.softmax(self.fc2(x), dim=1)

In [3]:
class ValueNet(torch.nn.Module):
    def __init__(self, state_dim, hidden_dim):
        super(ValueNet, self).__init__()
        self.fc1 = torch.nn.Linear(state_dim, hidden_dim)
        self.fc2 = torch.nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [4]:
class ActorCritic:
    def __init__(self, state_dim, hidden_dim, action_dim, actor_lr, critic_lr,
                 gamma, device):
        # 策略网络
        self.actor = PolicyNet(state_dim, hidden_dim, action_dim).to(device)
        self.critic = ValueNet(state_dim, hidden_dim).to(device)  # 价值网络
        # 策略网络优化器
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                lr=actor_lr)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 lr=critic_lr)  # 价值网络优化器
        self.gamma = gamma
        self.device = device

    def take_action(self, state):
        state = torch.tensor([state], dtype=torch.float).to(self.device)
        probs = self.actor(state)
        action_dist = torch.distributions.Categorical(probs)
        action = action_dist.sample()
        return action.item()

    def update(self, transition_dict):
        states = torch.tensor(transition_dict['states'],
                              dtype=torch.float).to(self.device)
        actions = torch.tensor(transition_dict['actions']).view(-1, 1).to(
            self.device)
        rewards = torch.tensor(transition_dict['rewards'],
                               dtype=torch.float).view(-1, 1).to(self.device)
        next_states = torch.tensor(transition_dict['next_states'],
                                   dtype=torch.float).to(self.device)
        dones = torch.tensor(transition_dict['dones'],
                             dtype=torch.float).view(-1, 1).to(self.device)

        # 时序差分目标
        td_target = rewards + self.gamma * self.critic(next_states) * (1 -
                                                                       dones)
        td_delta = td_target - self.critic(states)  # 时序差分误差
        log_probs = torch.log(self.actor(states).gather(1, actions))
        actor_loss = torch.mean(-log_probs * td_delta.detach())
        # 均方误差损失函数
        critic_loss = torch.mean(
            F.mse_loss(self.critic(states), td_target.detach()))
        self.actor_optimizer.zero_grad()
        self.critic_optimizer.zero_grad()
        actor_loss.backward()  # 计算策略网络的梯度
        critic_loss.backward()  # 计算价值网络的梯度
        self.actor_optimizer.step()  # 更新策略网络的参数
        self.critic_optimizer.step()  # 更新价值网络的参数

In [5]:
actor_lr = 1e-3
critic_lr = 1e-2
num_episodes = 1000
hidden_dim = 128
gamma = 0.98
device = torch.device("cuda") if torch.cuda.is_available() else torch.device(
    "cpu")

env_name = 'CartPole-v1'
env = gym.make(env_name)
env.reset(seed=0)
torch.manual_seed(0)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n
agent = ActorCritic(state_dim, hidden_dim, action_dim, actor_lr, critic_lr,
                    gamma, device)

return_list = rl_utils.train_on_policy_agent(env, agent, num_episodes)

  state = torch.tensor([state], dtype=torch.float).to(self.device)
  if not isinstance(terminated, (bool, np.bool8)):
Iteration 0:   1%|          | 1/100 [00:00<00:16,  5.94it/s]

(array([0.03132702, 0.04127556, 0.01066358, 0.02294966], dtype=float32), {})
[array([ 0.0043625 ,  0.04350724,  0.03158535, -0.04972615], dtype=float32)]
[array([ 0.00523264,  0.23816238,  0.03059083, -0.33227867], dtype=float32)]
[array([ 0.00999589,  0.43283588,  0.02394526, -0.61516   ], dtype=float32)]
[array([ 0.01865261,  0.6276152 ,  0.01164206, -0.90020597], dtype=float32)]
[array([ 0.03120491,  0.43233743, -0.00636206, -0.60388654], dtype=float32)]
[array([ 0.03985166,  0.6275478 , -0.01843979, -0.89856654], dtype=float32)]
[array([ 0.05240262,  0.43268055, -0.03641112, -0.61173636], dtype=float32)]
[array([ 0.06105623,  0.23808591, -0.04864585, -0.3307404 ], dtype=float32)]
[array([ 0.06581794,  0.43386537, -0.05526066, -0.63835835], dtype=float32)]
[array([ 0.07449526,  0.6297125 , -0.06802782, -0.9479191 ], dtype=float32)]
[array([ 0.08708951,  0.43556926, -0.08698621, -0.6773625 ], dtype=float32)]
[array([ 0.09580089,  0.24175666, -0.10053346, -0.4132845 ], dtype=float32)]

Iteration 0:  15%|█▌        | 15/100 [00:00<00:01, 43.75it/s, episode=10, return=13.700]

[array([-0.10630573, -0.96221286,  0.13547166,  1.6493179 ], dtype=float32)]
[array([-0.12554999, -0.76890963,  0.16845801,  1.4017267 ], dtype=float32)]
[array([-0.14092818, -0.96567595,  0.19649254,  1.7419909 ], dtype=float32)]
(array([-0.00141646,  0.03894878,  0.04340435, -0.01422048], dtype=float32), {})
[array([ 0.00715298, -0.01781306,  0.00943   , -0.01620888], dtype=float32)]
[array([ 0.00679672, -0.21306898,  0.00910583,  0.27943435], dtype=float32)]
[array([ 0.00253534, -0.01807809,  0.01469451, -0.01036277], dtype=float32)]
[array([ 0.00217378, -0.21340767,  0.01448726,  0.28691998], dtype=float32)]
[array([-0.00209437, -0.4087332 ,  0.02022566,  0.58413666], dtype=float32)]
[array([-0.01026904, -0.60413253,  0.03190839,  0.88312167], dtype=float32)]
[array([-0.02235169, -0.79967296,  0.04957082,  1.1856625 ], dtype=float32)]
[array([-0.03834515, -0.99540156,  0.07328407,  1.4934626 ], dtype=float32)]
[array([-0.05825318, -0.80124366,  0.10315333,  1.2245337 ], dtype=float

Iteration 0:  20%|██        | 20/100 [00:00<00:01, 43.68it/s, episode=20, return=19.800]

[array([-0.08177979, -0.41403946,  0.07136243,  0.6501328 ], dtype=float32)]
[array([-0.09006057, -0.61007905,  0.08436508,  0.9644065 ], dtype=float32)]
[array([-0.10226215, -0.41618556,  0.10365321,  0.69937456], dtype=float32)]
[array([-0.11058587, -0.6125802 ,  0.1176407 ,  1.022805  ], dtype=float32)]
[array([-0.12283747, -0.8090559 ,  0.13809681,  1.3499882 ], dtype=float32)]
[array([-0.1390186 , -1.0056161 ,  0.16509657,  1.6824929 ], dtype=float32)]
[array([-0.1591309 , -1.2022204 ,  0.19874643,  2.0217078 ], dtype=float32)]
(array([ 0.03097108,  0.0060476 , -0.02115788, -0.00871037], dtype=float32), {})
[array([ 0.0318121 ,  0.01265065,  0.04590777, -0.01305956], dtype=float32)]
[array([ 0.03206511,  0.20708518,  0.04564657, -0.29091194], dtype=float32)]
[array([ 0.03620682,  0.40152755,  0.03982833, -0.56885624], dtype=float32)]
[array([ 0.04423736,  0.20587027,  0.02845121, -0.26389664], dtype=float32)]
[array([0.04835477, 0.01035402, 0.02317328, 0.03762256], dtype=float32)]

Iteration 0:  30%|███       | 30/100 [00:00<00:01, 42.82it/s, episode=30, return=16.900]

[array([-0.0862795 , -1.0106431 ,  0.10958979,  1.5232773 ], dtype=float32)]
[array([-0.10649236, -1.2069048 ,  0.14005534,  1.8480587 ], dtype=float32)]
[array([-0.13063045, -1.403264  ,  0.17701651,  2.1807554 ], dtype=float32)]
(array([ 0.00776879,  0.01022392,  0.04624231, -0.04277347], dtype=float32), {})
[array([-2.7176341e-06,  2.4409749e-02, -3.2277327e-02, -1.1193327e-02],
      dtype=float32)]
[array([ 0.00048548,  0.21997938, -0.03250119, -0.31388286], dtype=float32)]
[array([ 0.00488506,  0.4155489 , -0.03877885, -0.61663586], dtype=float32)]
[array([ 0.01319604,  0.6111905 , -0.05111157, -0.9212758 ], dtype=float32)]
[array([ 0.02541985,  0.41679513, -0.06953708, -0.6450838 ], dtype=float32)]
[array([ 0.03375576,  0.6128137 , -0.08243876, -0.95882845], dtype=float32)]
[array([ 0.04601203,  0.41889104, -0.10161532, -0.69314194], dtype=float32)]
[array([ 0.05438985,  0.22531459, -0.11547817, -0.43409857], dtype=float32)]
[array([ 0.05889614,  0.42186585, -0.12416014, -0.7608

Iteration 0:  41%|████      | 41/100 [00:00<00:01, 47.43it/s, episode=40, return=21.300]

[array([ 0.00087459, -0.17034562,  0.04133606,  0.30306214], dtype=float32)]
[array([-0.00253232,  0.02416359,  0.0473973 ,  0.02369691], dtype=float32)]
[array([-0.00204905,  0.21857491,  0.04787124, -0.25366297], dtype=float32)]
[array([0.00232245, 0.02280327, 0.04279798, 0.05372684], dtype=float32)]
[array([ 0.00277851,  0.21728624,  0.04387252, -0.22515175], dtype=float32)]
[array([ 0.00712424,  0.4117546 ,  0.03936948, -0.5036792 ], dtype=float32)]
[array([ 0.01535933,  0.21610054,  0.0292959 , -0.1988539 ], dtype=float32)]
[array([0.01968134, 0.02057209, 0.02531882, 0.1029247 ], dtype=float32)]
[array([ 0.02009278, -0.17490338,  0.02737732,  0.40348688], dtype=float32)]
[array([ 0.01659472, -0.3704027 ,  0.03544705,  0.7046739 ], dtype=float32)]
[array([ 0.00918666, -0.1757894 ,  0.04954053,  0.42335656], dtype=float32)]
[array([ 0.00567087, -0.37157688,  0.05800766,  0.73123646], dtype=float32)]
[array([-0.00176066, -0.56745046,  0.07263239,  1.0415974 ], dtype=float32)]
[array(

Iteration 0:  56%|█████▌    | 56/100 [00:01<00:00, 50.93it/s, episode=50, return=14.100]

[array([-0.14604029, -0.99600923,  0.15782638,  1.5974452 ], dtype=float32)]
[array([-0.16596048, -1.1926105 ,  0.18977529,  1.9348906 ], dtype=float32)]
(array([-0.04593824, -0.03059725,  0.04450246, -0.03374303], dtype=float32), {})
[array([ 0.03520523,  0.03221372, -0.01087062, -0.00332165], dtype=float32)]
[array([ 0.03584951, -0.16275066, -0.01093706,  0.28591174], dtype=float32)]
[array([ 0.03259449, -0.35771495, -0.00521882,  0.5751252 ], dtype=float32)]
[array([ 0.0254402 , -0.55276334,  0.00628368,  0.8661595 ], dtype=float32)]
[array([ 0.01438493, -0.35772747,  0.02360687,  0.5754589 ], dtype=float32)]
[array([ 0.00723038, -0.16294426,  0.03511605,  0.29030526], dtype=float32)]
[array([ 0.00397149, -0.35854888,  0.04092215,  0.5938533 ], dtype=float32)]
[array([-0.00319948, -0.554219  ,  0.05279922,  0.89914054], dtype=float32)]
[array([-0.01428386, -0.75001526,  0.07078204,  1.2079414 ], dtype=float32)]
[array([-0.02928417, -0.5558754 ,  0.09494086,  0.93825334], dtype=float

Iteration 0:  71%|███████   | 71/100 [00:01<00:00, 61.62it/s, episode=70, return=11.600]

[array([ 0.03073068, -0.00896361, -0.01428364, -0.01238194], dtype=float32)]
[array([ 0.03055141,  0.18636024, -0.01453127, -0.30953708], dtype=float32)]
[array([ 0.03427861,  0.38168618, -0.02072201, -0.6067671 ], dtype=float32)]
[array([ 0.04191234,  0.18686001, -0.03285736, -0.32068244], dtype=float32)]
[array([ 0.04564954,  0.3824341 , -0.03927101, -0.62354356], dtype=float32)]
[array([ 0.05329822,  0.5780817 , -0.05174188, -0.92833143], dtype=float32)]
[array([ 0.06485985,  0.7738626 , -0.07030851, -1.2368151 ], dtype=float32)]
[array([ 0.08033711,  0.96981394, -0.09504481, -1.5506696 ], dtype=float32)]
[array([ 0.09973338,  1.165939  , -0.12605819, -1.8714304 ], dtype=float32)]
[array([ 0.12305216,  1.3621931 , -0.16348681, -2.2004387 ], dtype=float32)]
[array([ 0.15029603,  1.1689799 , -0.20749559, -1.9623348 ], dtype=float32)]
(array([ 0.03947285, -0.04517344, -0.0301776 ,  0.01362837], dtype=float32), {})
[array([ 0.02888452,  0.01066925, -0.03084108, -0.03823584], dtype=float

Iteration 0:  79%|███████▉  | 79/100 [00:01<00:00, 62.35it/s, episode=80, return=21.000]

[array([ 0.06545982,  0.7731332 , -0.04610662, -1.1808038 ], dtype=float32)]
[array([ 0.08092248,  0.96882236, -0.0697227 , -1.4875761 ], dtype=float32)]
[array([ 0.10029893,  0.77461565, -0.09947422, -1.2174557 ], dtype=float32)]
[array([ 0.11579124,  0.9708696 , -0.12382334, -1.5395786 ], dtype=float32)]
[array([ 0.13520864,  1.1672444 , -0.15461491, -1.8681967 ], dtype=float32)]
[array([ 0.15855353,  1.3636833 , -0.19197884, -2.204616  ], dtype=float32)]
(array([ 0.01641419,  0.01602765, -0.0415241 ,  0.00819026], dtype=float32), {})
[array([ 0.02359236,  0.02955684,  0.00885343, -0.03694269], dtype=float32)]
[array([ 0.0241835 , -0.16569094,  0.00811457,  0.25852036], dtype=float32)]
[array([ 0.02086968, -0.3609278 ,  0.01328498,  0.55375165], dtype=float32)]
[array([ 0.01365112, -0.1659949 ,  0.02436001,  0.2652838 ], dtype=float32)]
[array([ 0.01033122, -0.36145592,  0.02966569,  0.56554943], dtype=float32)]
[array([ 0.00310211, -0.5569812 ,  0.04097668,  0.8674288 ], dtype=float

Iteration 0:  92%|█████████▏| 92/100 [00:01<00:00, 58.06it/s, episode=90, return=19.700]

(array([-0.02056362, -0.00800812,  0.04622614, -0.00411394], dtype=float32), {})
[array([ 0.0450135 , -0.04694679, -0.04338897, -0.04721841], dtype=float32)]
[array([ 0.04407457,  0.1487696 , -0.04433334, -0.35326907], dtype=float32)]
[array([ 0.04704996, -0.04569482, -0.05139872, -0.07488876], dtype=float32)]
[array([ 0.04613606, -0.24004368, -0.0528965 ,  0.20114496], dtype=float32)]
[array([ 0.04133519, -0.4343708 , -0.0488736 ,  0.4766836 ], dtype=float32)]
[array([ 0.03264777, -0.6287698 , -0.03933993,  0.7535708 ], dtype=float32)]
[array([ 0.02007238, -0.43312818, -0.02426851,  0.44877246], dtype=float32)]
[array([ 0.01140981, -0.6278986 , -0.01529306,  0.73370785], dtype=float32)]
[array([-1.1481600e-03, -8.2280594e-01, -6.1890582e-04,  1.0215387e+00],
      dtype=float32)]
[array([-0.01760428, -0.6276758 ,  0.01981187,  0.7286615 ], dtype=float32)]
[array([-0.0301578, -0.4328332,  0.0343851,  0.4422793], dtype=float32)]
[array([-0.03881446, -0.23821427,  0.04323068,  0.1606307 

Iteration 0: 100%|██████████| 100/100 [00:01<00:00, 51.55it/s, episode=100, return=24.500]


[array([ 0.00281162,  0.3531076 ,  0.01929427, -0.48591068], dtype=float32)]
[array([ 0.00987377,  0.54795206,  0.00957606, -0.7724508 ], dtype=float32)]
[array([ 0.02083281,  0.35269967, -0.00587296, -0.4767703 ], dtype=float32)]
[array([ 0.02788681,  0.15766113, -0.01540837, -0.1859442 ], dtype=float32)]
[array([ 0.03104003, -0.03723702, -0.01912725,  0.10183842], dtype=float32)]
[array([ 0.03029529,  0.15815376, -0.01709048, -0.19681719], dtype=float32)]
[array([ 0.03345836,  0.35351595, -0.02102683, -0.49484202], dtype=float32)]
[array([ 0.04052868,  0.548928  , -0.03092367, -0.7940768 ], dtype=float32)]
[array([ 0.05150724,  0.35424387, -0.0468052 , -0.5112803 ], dtype=float32)]
[array([ 0.05859212,  0.1598114 , -0.05703081, -0.2337067 ], dtype=float32)]
[array([ 0.06178835,  0.35569987, -0.06170494, -0.54381967], dtype=float32)]
[array([ 0.06890234,  0.16149686, -0.07258134, -0.2711993 ], dtype=float32)]
[array([ 0.07213228, -0.03251836, -0.07800532, -0.00226285], dtype=float32)]

Iteration 1:   0%|          | 0/100 [00:00<?, ?it/s]

(array([ 0.0258999 , -0.04246113, -0.01205135, -0.01731522], dtype=float32), {})
[array([ 0.00704313,  0.01530532, -0.03186122, -0.00303406], dtype=float32)]
[array([ 0.00734924,  0.21086937, -0.0319219 , -0.3055969 ], dtype=float32)]
[array([ 0.01156663,  0.01621652, -0.03803384, -0.02314979], dtype=float32)]
[array([ 0.01189096, -0.17833993, -0.03849684,  0.2572946 ], dtype=float32)]
[array([ 0.00832416, -0.37289172, -0.03335094,  0.5375908 ], dtype=float32)]
[array([ 0.00086632, -0.5675293 , -0.02259913,  0.81958145], dtype=float32)]
[array([-0.01048426, -0.37210545, -0.0062075 ,  0.51987696], dtype=float32)]
[array([-0.01792637, -0.17689668,  0.00419004,  0.22524442], dtype=float32)]
[array([-0.0214643 , -0.37207827,  0.00869493,  0.5192461 ], dtype=float32)]
[array([-0.02890587, -0.56732154,  0.01907985,  0.8146562 ], dtype=float32)]
[array([-0.0402523 , -0.7626995 ,  0.03537297,  1.113279  ], dtype=float32)]
[array([-0.05550629, -0.9582677 ,  0.05763855,  1.4168452 ], dtype=float

Iteration 1:   5%|▌         | 5/100 [00:00<00:02, 40.97it/s]

[array([-0.09303999, -1.4035708 ,  0.03259615,  1.8773017 ], dtype=float32)]
[array([-0.12111141, -1.208819  ,  0.07014219,  1.5949109 ], dtype=float32)]
[array([-0.14528778, -1.4046993 ,  0.1020404 ,  1.9086137 ], dtype=float32)]
[array([-0.17338178, -1.2108158 ,  0.14021267,  1.6492491 ], dtype=float32)]
[array([-0.1975981 , -1.4072708 ,  0.17319766,  1.9821271 ], dtype=float32)]
(array([-0.04845868,  0.00641321, -0.03087332,  0.02666607], dtype=float32), {})
[array([-0.00206243,  0.00490799, -0.02065917, -0.00434215], dtype=float32)]
[array([-0.00196427,  0.20032004, -0.02074602, -0.30347103], dtype=float32)]
[array([ 0.00204213,  0.39573142, -0.02681544, -0.60262394], dtype=float32)]
[array([ 0.00995676,  0.2009946 , -0.03886792, -0.3185065 ], dtype=float32)]
[array([ 0.01397665,  0.00644716, -0.04523805, -0.03833003], dtype=float32)]
[array([ 0.01410559,  0.20218764, -0.04600465, -0.34493583], dtype=float32)]
[array([ 0.01814935,  0.39793283, -0.05290337, -0.6517635 ], dtype=float

Iteration 1:  10%|█         | 10/100 [00:00<00:02, 35.38it/s, episode=110, return=27.400]

[array([-0.13576156, -0.4505219 ,  0.17176978,  0.86877406], dtype=float32)]
[array([-0.144772  , -0.2581007 ,  0.18914527,  0.6346421 ], dtype=float32)]
[array([-0.14993401, -0.06605001,  0.2018381 ,  0.40698162], dtype=float32)]
(array([ 0.03942608,  0.03598475,  0.00337521, -0.01225558], dtype=float32), {})
[array([0.02129895, 0.02093697, 0.01822829, 0.03424241], dtype=float32)]
[array([ 0.02171769,  0.21579285,  0.01891314, -0.25263402], dtype=float32)]
[array([ 0.02603355,  0.4106397 ,  0.01386046, -0.5392919 ], dtype=float32)]
[array([ 0.03424634,  0.21532567,  0.00307462, -0.24227417], dtype=float32)]
[array([ 0.03855286,  0.02015993, -0.00177086,  0.05137699], dtype=float32)]
[array([ 0.03895606, -0.17493658, -0.00074332,  0.34350067], dtype=float32)]
[array([0.03545732, 0.02019594, 0.00612669, 0.05058344], dtype=float32)]
[array([ 0.03586124,  0.21522951,  0.00713836, -0.24016018], dtype=float32)]
[array([ 0.04016583,  0.41024876,  0.00233515, -0.53058296], dtype=float32)]
[ar

Iteration 1:  14%|█▍        | 14/100 [00:00<00:02, 36.98it/s, episode=110, return=27.400]

[array([-0.06570426, -0.75465065,  0.10042407,  1.3186486 ], dtype=float32)]
[array([-0.08079728, -0.5609317 ,  0.12679705,  1.0590092 ], dtype=float32)]
[array([-0.09201591, -0.3676965 ,  0.14797723,  0.8086618 ], dtype=float32)]
[array([-0.09936985, -0.17487852,  0.16415046,  0.56594247], dtype=float32)]
[array([-0.10286741,  0.01760629,  0.17546932,  0.32914028], dtype=float32)]
[array([-0.10251529,  0.20985286,  0.18205212,  0.09652207], dtype=float32)]
[array([-0.09831823,  0.01265193,  0.18398257,  0.44066328], dtype=float32)]
[array([-0.09806519,  0.20475864,  0.19279583,  0.21115275], dtype=float32)]
[array([-0.09397002,  0.39667642,  0.19701888, -0.01505878], dtype=float32)]
[array([-0.08603649,  0.5885076 ,  0.19671771, -0.23968938], dtype=float32)]
[array([-0.07426634,  0.3911991 ,  0.19192392,  0.10802792], dtype=float32)]
[array([-0.06644236,  0.5831269 ,  0.19408448, -0.11849792], dtype=float32)]
[array([-0.05477982,  0.7750151 ,  0.19171453, -0.34422284], dtype=float32)]

Iteration 1:  19%|█▉        | 19/100 [00:00<00:01, 41.04it/s, episode=110, return=27.400]

[array([-0.0345669 ,  0.03340212,  0.02204057, -0.02307206], dtype=float32)]
[array([-0.03389886,  0.22820117,  0.02157913, -0.30872026], dtype=float32)]
[array([-0.02933484,  0.4230091 ,  0.01540473, -0.5945204 ], dtype=float32)]
[array([-0.02087465,  0.22767496,  0.00351432, -0.2970251 ], dtype=float32)]
[array([-0.01632115,  0.42274663, -0.00242618, -0.5885976 ], dtype=float32)]
[array([-0.00786622,  0.61790246, -0.01419814, -0.88204384], dtype=float32)]
[array([ 0.00449183,  0.42297623, -0.03183901, -0.593858  ], dtype=float32)]
[array([ 0.01295135,  0.618529  , -0.04371617, -0.89639765], dtype=float32)]
[array([ 0.02532193,  0.42402616, -0.06164413, -0.61777055], dtype=float32)]
[array([ 0.03380246,  0.22981703, -0.07399954, -0.34512207], dtype=float32)]
[array([ 0.03839879,  0.42590934, -0.08090198, -0.6601916 ], dtype=float32)]
[array([ 0.04691698,  0.2320009 , -0.09410581, -0.39403948], dtype=float32)]
[array([ 0.051557  ,  0.0383315 , -0.1019866 , -0.13244708], dtype=float32)]

Iteration 1:  19%|█▉        | 19/100 [00:00<00:01, 41.04it/s, episode=120, return=33.900]

[array([ 0.05307733, -0.187679  , -0.00837966,  0.20399986], dtype=float32)]
[array([ 0.04932375,  0.00756178, -0.00429966, -0.09131461], dtype=float32)]
[array([ 0.04947499, -0.18749827, -0.00612595,  0.20000868], dtype=float32)]
[array([ 0.04572502,  0.00771075, -0.00212578, -0.09460039], dtype=float32)]
[array([ 0.04587924,  0.20286311, -0.00401779, -0.38795325], dtype=float32)]
[array([ 0.0499365 ,  0.39804184, -0.01177685, -0.6819002 ], dtype=float32)]
[array([ 0.05789734,  0.5933254 , -0.02541485, -0.9782675 ], dtype=float32)]
[array([ 0.06976385,  0.39855322, -0.04498021, -0.6936748 ], dtype=float32)]
[array([ 0.07773491,  0.20408314, -0.0588537 , -0.41548443], dtype=float32)]
[array([ 0.08181657,  0.00984256, -0.06716339, -0.14192101], dtype=float32)]
[array([ 0.08201342, -0.18425642, -0.07000181,  0.12883982], dtype=float32)]
[array([ 0.0783283 , -0.37830937, -0.06742501,  0.39864266], dtype=float32)]
[array([ 0.07076211, -0.18229893, -0.05945216,  0.08548711], dtype=float32)]

Iteration 1:  24%|██▍       | 24/100 [00:00<00:02, 36.23it/s, episode=120, return=33.900]

[array([-0.06052458, -0.3575568 ,  0.02325657,  0.461777  ], dtype=float32)]
[array([-0.06767572, -0.1627711 ,  0.03249211,  0.17651433], dtype=float32)]
[array([-0.07093114,  0.03187114,  0.03602239, -0.10574395], dtype=float32)]
[array([-0.07029372, -0.16374803,  0.03390752,  0.19808273], dtype=float32)]
[array([-0.07356868,  0.03087295,  0.03786917, -0.08371403], dtype=float32)]
[array([-0.07295122,  0.22543217,  0.03619489, -0.36421275], dtype=float32)]
[array([-0.06844258,  0.02981502,  0.02891064, -0.06034023], dtype=float32)]
[array([-0.06784628,  0.22451077,  0.02770383, -0.3437633 ], dtype=float32)]
[array([-0.06335606,  0.02900589,  0.02082856, -0.04247459], dtype=float32)]
[array([-0.06277594, -0.16640845,  0.01997907,  0.25670648], dtype=float32)]
[array([-0.06610411,  0.02842264,  0.0251132 , -0.02960837], dtype=float32)]
[array([-0.06553566, -0.16705027,  0.02452103,  0.27089107], dtype=float32)]
[array([-0.06887666,  0.02771333,  0.02993886, -0.01395807], dtype=float32)]

Iteration 1:  28%|██▊       | 28/100 [00:00<00:02, 35.46it/s, episode=120, return=33.900]

[array([-0.4726498 , -0.76955456,  0.11533247,  0.37093374], dtype=float32)]
[array([-0.4880409 , -0.5762439 ,  0.12275115,  0.11672673], dtype=float32)]
[array([-0.49956578, -0.38307503,  0.12508568, -0.13484807], dtype=float32)]
[array([-0.5072273 , -0.18994607,  0.12238871, -0.38559875], dtype=float32)]
[array([-0.5110262 , -0.38657364,  0.11467674, -0.05697109], dtype=float32)]
[array([-0.5187577 , -0.58313715,  0.11353732,  0.26957873], dtype=float32)]
[array([-0.5304204 , -0.77968085,  0.11892889,  0.59580433], dtype=float32)]
[array([-0.546014  , -0.58640647,  0.13084498,  0.34282336], dtype=float32)]
[array([-0.5577422 , -0.78312355,  0.13770145,  0.6737355 ], dtype=float32)]
[array([-0.5734046 , -0.97986346,  0.15117615,  1.0064083 ], dtype=float32)]
[array([-0.5930019 , -0.78704786,  0.17130433,  0.7647603 ], dtype=float32)]
[array([-0.6087429 , -0.59464675,  0.18659954,  0.5305003 ], dtype=float32)]
[array([-0.6206358 , -0.7918356 ,  0.19720954,  0.8756914 ], dtype=float32)]

Iteration 1:  32%|███▏      | 32/100 [00:00<00:01, 34.53it/s, episode=130, return=30.600]

[array([-0.04834603, -0.02863652, -0.04386608, -0.07292827], dtype=float32)]
[array([-0.04891876,  0.16708596, -0.04532465, -0.37912208], dtype=float32)]
[array([-0.04557704, -0.027364  , -0.05290709, -0.10106755], dtype=float32)]
[array([-0.04612432,  0.1684747 , -0.05492844, -0.409962  ], dtype=float32)]
[array([-0.04275483, -0.02582723, -0.06312767, -0.1350895 ], dtype=float32)]
[array([-0.04327137,  0.17013946, -0.06582947, -0.44700125], dtype=float32)]
[array([-0.03986858, -0.02399247, -0.07476949, -0.17577395], dtype=float32)]
[array([-0.04034843,  0.17211547, -0.07828497, -0.49107575], dtype=float32)]
[array([-0.03690612,  0.3682494 , -0.08810648, -0.80736786], dtype=float32)]
[array([-0.02954113,  0.17443833, -0.10425384, -0.5436487 ], dtype=float32)]
[array([-0.02605237,  0.37085897, -0.11512682, -0.8672751 ], dtype=float32)]
[array([-0.01863519,  0.17747597, -0.13247232, -0.6128934 ], dtype=float32)]
[array([-0.01508567,  0.3741761 , -0.1447302 , -0.9441916 ], dtype=float32)]

Iteration 1:  36%|███▌      | 36/100 [00:00<00:01, 35.50it/s, episode=130, return=30.600]

[array([-0.00318093,  0.40562686,  0.18890718,  0.01218464], dtype=float32)]
[array([0.00493161, 0.20836872, 0.18915087, 0.35801703], dtype=float32)]
[array([0.00909898, 0.01113204, 0.19631122, 0.7038746 ], dtype=float32)]
(array([ 0.03582685, -0.00428957, -0.03738278,  0.03519584], dtype=float32), {})
[array([ 0.03162467, -0.03644346,  0.03665265,  0.00189631], dtype=float32)]
[array([ 0.0308958 ,  0.15813419,  0.03669058, -0.27900067], dtype=float32)]
[array([ 0.03405849,  0.35271406,  0.03111056, -0.55988944], dtype=float32)]
[array([ 0.04111277,  0.15716957,  0.01991278, -0.2575696 ], dtype=float32)]
[array([ 0.04425616, -0.03823093,  0.01476138,  0.041327  ], dtype=float32)]
[array([ 0.04349154, -0.23356141,  0.01558792,  0.3386305 ], dtype=float32)]
[array([ 0.03882031, -0.42890167,  0.02236053,  0.6361879 ], dtype=float32)]
[array([ 0.03024228, -0.23409858,  0.03508429,  0.3506298 ], dtype=float32)]
[array([ 0.02556031, -0.42970145,  0.04209689,  0.6541662 ], dtype=float32)]
[ar

Iteration 1:  40%|████      | 40/100 [00:01<00:01, 36.33it/s, episode=140, return=34.200]

[array([ 0.02155983, -0.00702274,  0.13897254,  0.9776644 ], dtype=float32)]
[array([0.02141937, 0.18598987, 0.15852582, 0.73166335], dtype=float32)]
[array([ 0.02513917, -0.01092616,  0.1731591 ,  1.0697453 ], dtype=float32)]
[array([0.02492064, 0.1815361 , 0.19455399, 0.836024  ], dtype=float32)]
(array([-0.03929555,  0.00666783, -0.04040055, -0.03583977], dtype=float32), {})
[array([ 0.03009724, -0.02560897, -0.04387463,  0.01018249], dtype=float32)]
[array([ 0.02958506, -0.22007513, -0.04367098,  0.288706  ], dtype=float32)]
[array([ 0.02518356, -0.02435852, -0.03789686, -0.01742419], dtype=float32)]
[array([ 0.02469639,  0.17128585, -0.03824534, -0.32181904], dtype=float32)]
[array([ 0.0281221 , -0.02327119, -0.04468172, -0.04143828], dtype=float32)]
[array([ 0.02765668, -0.21772489, -0.04551049,  0.23681909], dtype=float32)]
[array([ 0.02330218, -0.02198328, -0.04077411, -0.06986477], dtype=float32)]
[array([ 0.02286252, -0.21649766, -0.0421714 ,  0.20968004], dtype=float32)]
[ar

Iteration 1:  44%|████▍     | 44/100 [00:01<00:01, 29.48it/s, episode=140, return=34.200]

(array([-0.04901326,  0.02838147, -0.01159982, -0.03933468], dtype=float32), {})
[array([ 0.00468348, -0.01300373,  0.0105858 , -0.04833279], dtype=float32)]
[array([ 0.00442341,  0.18196484,  0.00961914, -0.3376571 ], dtype=float32)]
[array([ 0.0080627 , -0.01329266,  0.002866  , -0.04195641], dtype=float32)]
[array([ 0.00779685, -0.20845559,  0.00202687,  0.25162938], dtype=float32)]
[array([ 0.00362774, -0.01336264,  0.00705946, -0.04041353], dtype=float32)]
[array([ 0.00336048,  0.18165737,  0.00625119, -0.3308608 ], dtype=float32)]
[array([ 6.993632e-03,  3.766898e-01, -3.660290e-04, -6.215659e-01],
      dtype=float32)]
[array([ 0.01452743,  0.18157294, -0.01279735, -0.32899824], dtype=float32)]
[array([ 0.01815889,  0.37687472, -0.01937731, -0.6256892 ], dtype=float32)]
[array([ 0.02569638,  0.18202855, -0.0318911 , -0.3391714 ], dtype=float32)]
[array([ 0.02933695, -0.01262544, -0.03867452, -0.05671328], dtype=float32)]
[array([ 0.02908444,  0.18302909, -0.03980879, -0.3613431 

Iteration 1:  48%|████▊     | 48/100 [00:01<00:01, 26.87it/s, episode=140, return=34.200]

[array([ 0.07591776,  0.15401681, -0.07277839, -0.5023564 ], dtype=float32)]
[array([ 0.0789981 , -0.04000792, -0.08282551, -0.2334677 ], dtype=float32)]
[array([ 0.07819793, -0.23385477, -0.08749487,  0.03198293], dtype=float32)]
[array([ 0.07352084, -0.42762014, -0.08685521,  0.2958285 ], dtype=float32)]
[array([ 0.06496844, -0.62140346, -0.08093864,  0.5599044 ], dtype=float32)]
[array([ 0.05254037, -0.4252443 , -0.06974056,  0.24285898], dtype=float32)]
[array([ 0.04403548, -0.61930436, -0.06488337,  0.5127544 ], dtype=float32)]
[array([ 0.0316494 , -0.42333144, -0.05462829,  0.20035115], dtype=float32)]
[array([ 0.02318277, -0.61763126, -0.05062126,  0.47531337], dtype=float32)]
[array([ 0.01083014, -0.8120032 , -0.041115  ,  0.75162154], dtype=float32)]
[array([-0.00540992, -0.6163391 , -0.02608257,  0.446289  ], dtype=float32)]
[array([-0.01773671, -0.42085806, -0.01715679,  0.14549977], dtype=float32)]
[array([-0.02615387, -0.22549465, -0.01424679, -0.15254605], dtype=float32)]

Iteration 1:  51%|█████     | 51/100 [00:01<00:01, 26.76it/s, episode=150, return=56.500]

[array([-0.07562006,  0.18777876,  0.03655251, -0.4270335 ], dtype=float32)]
[array([-0.07186449, -0.00784132,  0.02801184, -0.12305535], dtype=float32)]
[array([-0.07202131,  0.18686835,  0.02555073, -0.40677086], dtype=float32)]
[array([-0.06828395, -0.00860642,  0.01741531, -0.10614332], dtype=float32)]
[array([-0.06845607, -0.20397356,  0.01529245,  0.19198279], dtype=float32)]
[array([-0.07253554, -0.3993109 ,  0.01913211,  0.48945034], dtype=float32)]
[array([-0.08052176, -0.204464  ,  0.02892111,  0.202858  ], dtype=float32)]
[array([-0.08461104, -0.00976735,  0.03297827, -0.08056325], dtype=float32)]
[array([-0.08480639,  0.18486671,  0.03136701, -0.36266175], dtype=float32)]
[array([-0.08110905, -0.01068669,  0.02411377, -0.06025553], dtype=float32)]
[array([-0.08132279,  0.18408136,  0.02290866, -0.34523392], dtype=float32)]
[array([-0.07764116, -0.01135885,  0.01600398, -0.04541597], dtype=float32)]
[array([-0.07786833, -0.20670658,  0.01509566,  0.25227308], dtype=float32)]

Iteration 1:  54%|█████▍    | 54/100 [00:01<00:01, 25.17it/s, episode=150, return=56.500]

[array([ 0.01266102,  0.55475163,  0.18462782, -0.03591374], dtype=float32)]
[array([0.02375606, 0.35752818, 0.18390954, 0.30886632], dtype=float32)]
[array([0.03090662, 0.54961866, 0.19008687, 0.07935481], dtype=float32)]
[array([ 0.04189899,  0.7415794 ,  0.19167396, -0.14785028], dtype=float32)]
[array([ 0.05673058,  0.9335137 ,  0.18871695, -0.37447158], dtype=float32)]
[array([ 0.07540085,  0.7362826 ,  0.18122752, -0.02872346], dtype=float32)]
[array([ 0.09012651,  0.9284055 ,  0.18065305, -0.25919855], dtype=float32)]
[array([0.10869461, 0.73122627, 0.17546909, 0.08458073], dtype=float32)]
[array([ 0.12331914,  0.92345566,  0.1771607 , -0.14801216], dtype=float32)]
[array([0.14178826, 0.7262976 , 0.17420046, 0.19490926], dtype=float32)]
[array([0.15631421, 0.52916735, 0.17809863, 0.5370858 ], dtype=float32)]
[array([0.16689755, 0.3320474 , 0.18884036, 0.8801733 ], dtype=float32)]
[array([0.1735385 , 0.13493125, 0.20644382, 1.2257808 ], dtype=float32)]
(array([-0.0019093 , -0.030

Iteration 1:  57%|█████▋    | 57/100 [00:01<00:01, 24.67it/s, episode=150, return=56.500]

[array([ 0.03216251, -0.02303766,  0.06059799,  0.15986888], dtype=float32)]
[array([ 0.03170176,  0.17116675,  0.06379537, -0.11309813], dtype=float32)]
[array([ 0.0351251 ,  0.3653193 ,  0.06153341, -0.38499144], dtype=float32)]
[array([ 0.04243148,  0.5595162 ,  0.05383358, -0.657656  ], dtype=float32)]
[array([ 0.0536218 ,  0.36368784,  0.04068046, -0.34851965], dtype=float32)]
[array([ 0.06089556,  0.16801164,  0.03371006, -0.04329138], dtype=float32)]
[array([ 0.0642558 , -0.02757709,  0.03284423,  0.2598339 ], dtype=float32)]
[array([ 0.06370425, -0.22315215,  0.03804091,  0.5626927 ], dtype=float32)]
[array([ 0.05924121, -0.02858407,  0.04929477,  0.2822331 ], dtype=float32)]
[array([ 0.05866953, -0.22437322,  0.05493943,  0.5900469 ], dtype=float32)]
[array([ 0.05418206, -0.03006182,  0.06674036,  0.31516358], dtype=float32)]
[array([ 0.05358083, -0.22606784,  0.07304364,  0.6281252 ], dtype=float32)]
[array([ 0.04905947, -0.42212912,  0.08560614,  0.9428886 ], dtype=float32)]

Iteration 1:  60%|██████    | 60/100 [00:01<00:01, 25.85it/s, episode=160, return=47.300]

[array([-0.04628823, -0.05669849,  0.11685452,  0.17429093], dtype=float32)]
[array([-0.0474222 , -0.2532821 ,  0.12034034,  0.5014325 ], dtype=float32)]
[array([-0.05248785, -0.06004373,  0.130369  ,  0.24896595], dtype=float32)]
[array([-0.05368872, -0.25676313,  0.1353483 ,  0.579761  ], dtype=float32)]
[array([-0.05882398, -0.06377152,  0.14694352,  0.33259213], dtype=float32)]
[array([-0.06009941, -0.26064605,  0.15359537,  0.66776747], dtype=float32)]
[array([-0.06531233, -0.06795584,  0.16695072,  0.4271146 ], dtype=float32)]
[array([-0.06667145,  0.12445683,  0.17549302,  0.19136435], dtype=float32)]
[array([-0.06418231,  0.31669077,  0.1793203 , -0.04122715], dtype=float32)]
[array([-0.0578485 ,  0.5088488 ,  0.17849575, -0.27240425], dtype=float32)]
[array([-0.04767152,  0.7010344 ,  0.17304768, -0.503903  ], dtype=float32)]
[array([-0.03365083,  0.5039502 ,  0.1629696 , -0.16206942], dtype=float32)]
[array([-0.02357183,  0.30691585,  0.15972823,  0.17726721], dtype=float32)]

Iteration 1:  63%|██████▎   | 63/100 [00:02<00:01, 25.48it/s, episode=160, return=47.300]

(array([ 0.02602222, -0.01523423,  0.00815812,  0.03131231], dtype=float32), {})
[array([-0.03611542, -0.04185794, -0.00414148, -0.01896597], dtype=float32)]
[array([-0.03695257,  0.15332316, -0.0045208 , -0.3129527 ], dtype=float32)]
[array([-0.03388611, -0.0417341 , -0.01077985, -0.02169891], dtype=float32)]
[array([-0.03472079,  0.15354078, -0.01121383, -0.31776342], dtype=float32)]
[array([-0.03164998, -0.04141968, -0.0175691 , -0.02863792], dtype=float32)]
[array([-0.03247837,  0.15394975, -0.01814186, -0.3268119 ], dtype=float32)]
[array([-0.02939938,  0.34932524, -0.0246781 , -0.62516034], dtype=float32)]
[array([-0.02241287,  0.5447829 , -0.0371813 , -0.9255121 ], dtype=float32)]
[array([-0.01151721,  0.35018227, -0.05569154, -0.6447417 ], dtype=float32)]
[array([-0.00451357,  0.15587883, -0.06858638, -0.3701033 ], dtype=float32)]
[array([-0.00139599, -0.03820504, -0.07598844, -0.09981056], dtype=float32)]
[array([-0.00216009, -0.23216034, -0.07798465,  0.16796374], dtype=float

Iteration 1:  66%|██████▌   | 66/100 [00:02<00:01, 24.04it/s, episode=160, return=47.300]

[array([-0.04251378,  0.1692612 , -0.05040505, -0.36922613], dtype=float32)]
[array([-0.03912855, -0.0251097 , -0.05778958, -0.0928529 ], dtype=float32)]
[array([-0.03963075, -0.21935777, -0.05964663,  0.18105215], dtype=float32)]
[array([-0.0440179 , -0.41357774, -0.05602559,  0.45433828], dtype=float32)]
[array([-0.05228946, -0.2177102 , -0.04693882,  0.14453505], dtype=float32)]
[array([-0.05664366, -0.4121296 , -0.04404812,  0.422048  ], dtype=float32)]
[array([-0.06488626, -0.21641217, -0.03560716,  0.11581066], dtype=float32)]
[array([-0.0692145 , -0.4110063 , -0.03329095,  0.39705056], dtype=float32)]
[array([-0.07743462, -0.21542825, -0.02534994,  0.09406028], dtype=float32)]
[array([-0.08174319, -0.41017786, -0.02346873,  0.37863877], dtype=float32)]
[array([-0.08994675, -0.21473062, -0.01589596,  0.07864945], dtype=float32)]
[array([-0.09424136, -0.40962112, -0.01432297,  0.36627507], dtype=float32)]
[array([-0.10243378, -0.2142986 , -0.00699747,  0.06911052], dtype=float32)]

Iteration 1:  69%|██████▉   | 69/100 [00:02<00:01, 22.58it/s, episode=170, return=51.600]

[array([-0.04572129, -0.39592308,  0.01988369,  0.55829746], dtype=float32)]
[array([-0.05363975, -0.2010858 ,  0.03104964,  0.27194476], dtype=float32)]
[array([-0.05766147, -0.00642036,  0.03648854, -0.01078556], dtype=float32)]
[array([-0.05778988, -0.2020461 ,  0.03627283,  0.29318297], dtype=float32)]
[array([-0.0618308 , -0.39766592,  0.04213649,  0.5970816 ], dtype=float32)]
[array([-0.06978411, -0.20315816,  0.05407812,  0.31796336], dtype=float32)]
[array([-0.07384728, -0.00884647,  0.06043738,  0.04281313], dtype=float32)]
[array([-0.07402421, -0.20478065,  0.06129365,  0.3539356 ], dtype=float32)]
[array([-0.07811982, -0.40071818,  0.06837236,  0.665299  ], dtype=float32)]
[array([-0.08613419, -0.20661052,  0.08167834,  0.39490363], dtype=float32)]
[array([-0.09026639, -0.4027907 ,  0.08957642,  0.7121796 ], dtype=float32)]
[array([-0.09832221, -0.59903127,  0.10382   ,  1.0316606 ], dtype=float32)]
[array([-0.11030284, -0.4054321 ,  0.12445322,  0.7732937 ], dtype=float32)]

Iteration 1:  72%|███████▏  | 72/100 [00:02<00:01, 22.30it/s, episode=170, return=51.600]

[array([-0.03498691, -0.36876026,  0.0294745 ,  0.69408816], dtype=float32)]
[array([-0.04236212, -0.17405929,  0.04335627,  0.41082782], dtype=float32)]
[array([-0.0458433 ,  0.02042205,  0.05157282,  0.13212271], dtype=float32)]
[array([-0.04543486,  0.21476877,  0.05421527, -0.14385408], dtype=float32)]
[array([-0.04113949,  0.40907407,  0.0513382 , -0.41895232], dtype=float32)]
[array([-0.032958  ,  0.60343236,  0.04295915, -0.6950189 ], dtype=float32)]
[array([-0.02088936,  0.40774176,  0.02905877, -0.3891276 ], dtype=float32)]
[array([-0.01273452,  0.60243946,  0.02127622, -0.67250884], dtype=float32)]
[array([-0.00068573,  0.40702832,  0.00782604, -0.37320372], dtype=float32)]
[array([ 7.4548330e-03,  6.0203820e-01,  3.6196798e-04, -6.6340882e-01],
      dtype=float32)]
[array([ 0.0194956 ,  0.40691125, -0.01290621, -0.3706119 ], dtype=float32)]
[array([ 0.02763382,  0.21197501, -0.02031845, -0.08202624], dtype=float32)]
[array([ 0.03187332,  0.01715012, -0.02195897,  0.20417756

Iteration 1:  75%|███████▌  | 75/100 [00:02<00:01, 23.52it/s, episode=170, return=51.600]

[array([ 0.1101985 ,  0.06765603, -0.132233  , -0.46767822], dtype=float32)]
[array([ 0.11155162, -0.12537414, -0.14158656, -0.21942401], dtype=float32)]
[array([ 0.10904413,  0.07145792, -0.14597504, -0.55320513], dtype=float32)]
[array([ 0.1104733 , -0.1213451 , -0.15703915, -0.30984086], dtype=float32)]
[array([ 0.1080464 , -0.31392172, -0.16323596, -0.07051049], dtype=float32)]
[array([ 0.10176796, -0.11688164, -0.16464618, -0.40992013], dtype=float32)]
[array([ 0.09943033, -0.30933332, -0.17284457, -0.17333409], dtype=float32)]
[array([ 0.09324366, -0.5016144 , -0.17631125,  0.06022623], dtype=float32)]
[array([ 0.08321137, -0.6938278 , -0.17510673,  0.29250923], dtype=float32)]
[array([ 0.06933482, -0.4966979 , -0.16925655, -0.04988252], dtype=float32)]
[array([ 0.05940086, -0.2996043 , -0.1702542 , -0.3908225 ], dtype=float32)]
[array([ 0.05340877, -0.10252677, -0.17807065, -0.73197794], dtype=float32)]
[array([ 0.05135824, -0.29479975, -0.1927102 , -0.5002055 ], dtype=float32)]

Iteration 1:  79%|███████▉  | 79/100 [00:02<00:00, 23.51it/s, episode=180, return=49.600]

[array([0.28882432, 0.212353  , 0.07452749, 0.23513165], dtype=float32)]
[array([0.2930714 , 0.01624974, 0.07923011, 0.5503608 ], dtype=float32)]
[array([0.29339638, 0.21017466, 0.09023733, 0.28365526], dtype=float32)]
[array([0.29759988, 0.40390146, 0.09591044, 0.02074115], dtype=float32)]
[array([0.30567792, 0.20754422, 0.09632526, 0.3420778 ], dtype=float32)]
[array([0.3098288 , 0.4011732 , 0.10316682, 0.08125661], dtype=float32)]
[array([0.31785226, 0.20473526, 0.10479195, 0.4046241 ], dtype=float32)]
[array([0.32194695, 0.00829539, 0.11288443, 0.72841996], dtype=float32)]
[array([0.32211286, 0.20169088, 0.12745284, 0.47329068], dtype=float32)]
[array([0.3261467 , 0.00502108, 0.13691865, 0.8032723 ], dtype=float32)]
[array([0.3262471 , 0.19802669, 0.1529841 , 0.55660176], dtype=float32)]
[array([0.33020765, 0.00112542, 0.16411613, 0.89330626], dtype=float32)]
[array([ 0.33023015, -0.19579634,  0.18198225,  1.2327538 ], dtype=float32)]
[array([ 0.3263142 , -0.00341977,  0.20663732, 

Iteration 1:  81%|████████  | 81/100 [00:02<00:00, 24.61it/s, episode=180, return=49.600]

[array([-0.0304109 ,  0.16617233,  0.16712376,  0.38880157], dtype=float32)]
[array([-0.02708746, -0.03087853,  0.1748998 ,  0.7291657 ], dtype=float32)]
[array([-0.02770503, -0.22793113,  0.18948312,  1.071394  ], dtype=float32)]
(array([-0.01816373, -0.04470143,  0.04965039, -0.00761497], dtype=float32), {})
[array([ 0.01784851, -0.02147368, -0.03577821, -0.03064654], dtype=float32)]
[array([ 0.01741904, -0.21606477, -0.03639114,  0.25053674], dtype=float32)]
[array([ 0.01309774, -0.41064867, -0.03138041,  0.5315227 ], dtype=float32)]
[array([ 0.00488477, -0.21509969, -0.02074995,  0.22911933], dtype=float32)]
[array([ 0.00058277, -0.40991908, -0.01616757,  0.5151854 ], dtype=float32)]
[array([-0.00761561, -0.21457322, -0.00586386,  0.2174519 ], dtype=float32)]
[array([-0.01190707, -0.01936794, -0.00151482, -0.07707496], dtype=float32)]
[array([-0.01229443, -0.21446814, -0.00305632,  0.21512966], dtype=float32)]
[array([-0.01658379, -0.01930263,  0.00124627, -0.07851581], dtype=float

Iteration 1:  84%|████████▍ | 84/100 [00:03<00:00, 21.95it/s, episode=180, return=49.600]

[array([-0.310295  , -0.37954897, -0.06287597, -0.15514906], dtype=float32)]
[array([-0.31788597, -0.57371694, -0.06597895,  0.11705402], dtype=float32)]
[array([-0.3293603 , -0.76783454, -0.06363787,  0.38821313], dtype=float32)]
[array([-0.344717  , -0.57186973, -0.05587361,  0.07616402], dtype=float32)]
[array([-0.35615438, -0.76614803, -0.05435033,  0.3507084 ], dtype=float32)]
[array([-0.37147734, -0.9604566 , -0.04733616,  0.6257698 ], dtype=float32)]
[array([-0.39068648, -0.76470697, -0.03482076,  0.31856254], dtype=float32)]
[array([-0.40598062, -0.5691068 , -0.02844951,  0.01510496], dtype=float32)]
[array([-0.41736275, -0.76380944, -0.02814741,  0.2986777 ], dtype=float32)]
[array([-0.43263894, -0.9585191 , -0.02217386,  0.5823523 ], dtype=float32)]
[array([-0.45180932, -0.7630936 , -0.01052681,  0.2827675 ], dtype=float32)]
[array([-0.4670712 , -0.5678231 , -0.00487146, -0.01321684], dtype=float32)]
[array([-0.47842765, -0.37263164, -0.0051358 , -0.30743277], dtype=float32)]

Iteration 1:  87%|████████▋ | 87/100 [00:03<00:00, 20.23it/s, episode=180, return=49.600]

[array([0.5180849 , 0.88968873, 0.20872974, 0.7156752 ], dtype=float32)]
(array([-0.02986562,  0.04699284, -0.02037351,  0.02253639], dtype=float32), {})
[array([ 0.01877254,  0.04232768,  0.02650268, -0.0112295 ], dtype=float32)]
[array([ 0.0196191 , -0.15316413,  0.02627809,  0.28969613], dtype=float32)]
[array([0.01655582, 0.04157344, 0.03207202, 0.00541548], dtype=float32)]
[array([ 0.01738728, -0.15399344,  0.03218032,  0.3080425 ], dtype=float32)]
[array([0.01430742, 0.04065555, 0.03834118, 0.02567956], dtype=float32)]
[array([ 0.01512053,  0.23520727,  0.03885477, -0.25466403], dtype=float32)]
[array([0.01982467, 0.03955271, 0.03376149, 0.05001675], dtype=float32)]
[array([ 0.02061573,  0.2341747 ,  0.03476182, -0.23182587], dtype=float32)]
[array([ 0.02529922,  0.42878315,  0.0301253 , -0.5133443 ], dtype=float32)]
[array([ 0.03387488,  0.23325013,  0.01985842, -0.21132234], dtype=float32)]
[array([0.03853989, 0.03784996, 0.01563197, 0.08755812], dtype=float32)]
[array([ 0.0392

Iteration 1:  89%|████████▉ | 89/100 [00:03<00:00, 20.23it/s, episode=190, return=56.000]

[array([ 0.02246533, -0.15309705, -0.00137077,  0.31378776], dtype=float32)]
[array([0.01940338, 0.04204441, 0.00490499, 0.02067284], dtype=float32)]
[array([ 0.02024427, -0.15314753,  0.00531845,  0.3148993 ], dtype=float32)]
[array([0.01718132, 0.04189825, 0.01161643, 0.02389838], dtype=float32)]
[array([ 0.01801929, -0.15338835,  0.0120944 ,  0.32022366], dtype=float32)]
[array([ 0.01495152, -0.34868044,  0.01849887,  0.61669606], dtype=float32)]
[array([ 0.00797791, -0.15382174,  0.03083279,  0.32989636], dtype=float32)]
[array([0.00490148, 0.04084804, 0.03743072, 0.04709388], dtype=float32)]
[array([ 0.00571844, -0.15479009,  0.0383726 ,  0.3513476 ], dtype=float32)]
[array([0.00262264, 0.03976575, 0.04539955, 0.07100744], dtype=float32)]
[array([ 0.00341795,  0.23420842,  0.0468197 , -0.20701316], dtype=float32)]
[array([0.00810212, 0.03844932, 0.04267944, 0.10006367], dtype=float32)]
[array([ 0.00887111, -0.1572575 ,  0.04468071,  0.4059005 ], dtype=float32)]
[array([ 0.00572596

Iteration 1:  91%|█████████ | 91/100 [00:03<00:00, 22.50it/s, episode=190, return=56.000]

(array([-0.01031573, -0.01741138, -0.02535163,  0.02751144], dtype=float32), {})
[array([-0.01961862,  0.04146551, -0.03112792,  0.03949361], dtype=float32)]
[array([-0.01878931,  0.23701969, -0.03033805, -0.2628457 ], dtype=float32)]
[array([-0.01404891,  0.04234363, -0.03559496,  0.02011602], dtype=float32)]
[array([-0.01320204, -0.15225026, -0.03519264,  0.30135924], dtype=float32)]
[array([-0.01624705,  0.04335516, -0.02916546, -0.0022117 ], dtype=float32)]
[array([-0.01537994, -0.15133664, -0.02920969,  0.28112838], dtype=float32)]
[array([-0.01840668,  0.04418953, -0.02358712, -0.0206222 ], dtype=float32)]
[array([-0.01752288,  0.23964167, -0.02399957, -0.3206528 ], dtype=float32)]
[array([-0.01273005,  0.04486957, -0.03041262, -0.03563401], dtype=float32)]
[array([-0.01183266, -0.14980336, -0.0311253 ,  0.24730042], dtype=float32)]
[array([-0.01482873,  0.04574897, -0.0261793 , -0.05503537], dtype=float32)]
[array([-0.01391375, -0.14898801, -0.02728   ,  0.22927418], dtype=float

Iteration 1:  94%|█████████▍| 94/100 [00:03<00:00, 21.03it/s, episode=190, return=56.000]

[array([ 0.03072329,  0.17865393, -0.07137661, -0.6268235 ], dtype=float32)]
[array([ 0.03429637, -0.01540298, -0.08391308, -0.35744622], dtype=float32)]
[array([ 0.03398831, -0.20923795, -0.091062  , -0.09235834], dtype=float32)]
[array([ 0.02980355, -0.01293677, -0.09290917, -0.41232535], dtype=float32)]
[array([ 0.02954481,  0.18337095, -0.10115568, -0.732793  ], dtype=float32)]
[array([ 0.03321223, -0.01021861, -0.11581153, -0.4735821 ], dtype=float32)]
[array([ 0.03300786, -0.20353088, -0.12528318, -0.21952862], dtype=float32)]
[array([ 0.02893724, -0.39666006, -0.12967375,  0.03115765], dtype=float32)]
[array([ 0.02100404, -0.19993998, -0.1290506 , -0.29946235], dtype=float32)]
[array([ 0.01700524, -0.00323737, -0.13503985, -0.6298984 ], dtype=float32)]
[array([ 0.01694049, -0.19624226, -0.14763781, -0.3826079 ], dtype=float32)]
[array([ 1.3015648e-02,  6.3367392e-04, -1.5528998e-01, -7.1795720e-01],
      dtype=float32)]
[array([ 0.01302832, -0.19203736, -0.16964911, -0.4779003 

Iteration 1:  97%|█████████▋| 97/100 [00:03<00:00, 22.03it/s, episode=190, return=56.000]

[array([-0.05653562,  0.03828801, -0.16788273, -0.8544778 ], dtype=float32)]
[array([-0.05576986, -0.15419702, -0.18497229, -0.6189356 ], dtype=float32)]
[array([-0.05885381,  0.04296088, -0.197351  , -0.9636998 ], dtype=float32)]
(array([-0.02920137,  0.03923173, -0.00411653,  0.02674431], dtype=float32), {})
[array([ 0.04062356, -0.0106849 ,  0.04238844,  0.01804144], dtype=float32)]
[array([ 0.04040986, -0.20638831,  0.04274927,  0.32379133], dtype=float32)]
[array([ 0.0362821 , -0.01190031,  0.0492251 ,  0.04489034], dtype=float32)]
[array([ 0.03604409, -0.20769231,  0.05012291,  0.3526889 ], dtype=float32)]
[array([ 0.03189025, -0.01331761,  0.05717668,  0.07622299], dtype=float32)]
[array([ 0.03162389,  0.18094005,  0.05870114, -0.19788653], dtype=float32)]
[array([ 0.0352427 ,  0.3751754 ,  0.05474341, -0.4714895 ], dtype=float32)]
[array([ 0.0427462 ,  0.17932467,  0.04531362, -0.16206698], dtype=float32)]
[array([ 0.04633269, -0.0164157 ,  0.04207228,  0.14455985], dtype=float

Iteration 1: 100%|██████████| 100/100 [00:03<00:00, 26.18it/s, episode=200, return=53.900]


[array([0.43962488, 0.5644604 , 0.08858263, 0.53002983], dtype=float32)]
[array([0.45091408, 0.7582319 , 0.09918323, 0.2665215 ], dtype=float32)]
[array([0.46607873, 0.5618446 , 0.10451367, 0.5887668 ], dtype=float32)]
[array([0.4773156 , 0.36542633, 0.116289  , 0.91245764], dtype=float32)]
[array([0.48462415, 0.55879927, 0.13453816, 0.6584692 ], dtype=float32)]
[array([0.49580014, 0.7518176 , 0.14770754, 0.4109931 ], dtype=float32)]
[array([0.5108365 , 0.94457066, 0.1559274 , 0.1682801 ], dtype=float32)]
[array([ 0.5297279 ,  1.1371568 ,  0.159293  , -0.07143744], dtype=float32)]
[array([0.55247104, 0.94015235, 0.15786426, 0.26696247], dtype=float32)]
[array([0.5712741 , 0.743171  , 0.16320351, 0.60498   ], dtype=float32)]
[array([0.5861375 , 0.93567985, 0.1753031 , 0.36782292], dtype=float32)]
[array([0.60485107, 0.73855674, 0.18265957, 0.7102514 ], dtype=float32)]
[array([0.61962223, 0.9307424 , 0.19686459, 0.4801725 ], dtype=float32)]
[array([0.63823706, 0.73346573, 0.20646805, 0.8

Iteration 2:   0%|          | 0/100 [00:00<?, ?it/s]

(array([ 0.0245394 , -0.00459558,  0.01331436, -0.04929732], dtype=float32), {})
[array([0.0262811 , 0.04610293, 0.03452218, 0.03167114], dtype=float32)]
[array([ 0.02720316, -0.14949664,  0.03515561,  0.3350432 ], dtype=float32)]
[array([0.02421322, 0.0451078 , 0.04185647, 0.05365051], dtype=float32)]
[array([ 0.02511538,  0.23960537,  0.04292948, -0.2255381 ], dtype=float32)]
[array([0.02990749, 0.04389701, 0.03841872, 0.08037136], dtype=float32)]
[array([ 0.03078543, -0.15175402,  0.04002614,  0.38492367], dtype=float32)]
[array([0.02775035, 0.0427775 , 0.04772462, 0.10512464], dtype=float32)]
[array([ 0.0286059 ,  0.2371842 ,  0.04982711, -0.17212765], dtype=float32)]
[array([0.03334958, 0.04138581, 0.04638456, 0.13584852], dtype=float32)]
[array([ 0.0341773 , -0.15436879,  0.04910153,  0.44279677], dtype=float32)]
[array([0.03108992, 0.04002523, 0.05795746, 0.16598721], dtype=float32)]
[array([ 0.03189043, -0.1558764 ,  0.06127721,  0.47637624], dtype=float32)]
[array([ 0.0287729 

Iteration 2:   2%|▏         | 2/100 [00:00<00:05, 17.17it/s]

(array([-0.00264123,  0.02353595, -0.00108409, -0.03864576], dtype=float32), {})
[array([-0.04664647, -0.0151804 ,  0.03366305, -0.0029835 ], dtype=float32)]
[array([-0.04695008,  0.179443  ,  0.03360338, -0.28485814], dtype=float32)]
[array([-0.04336122, -0.01614169,  0.02790621,  0.01823081], dtype=float32)]
[array([-0.04368405,  0.17856917,  0.02827083, -0.2655184 ], dtype=float32)]
[array([-0.04011267, -0.01694464,  0.02296046,  0.03594563], dtype=float32)]
[array([-0.04045156, -0.21238819,  0.02367937,  0.33578348], dtype=float32)]
[array([-0.04469933, -0.0176111 ,  0.03039504,  0.05066082], dtype=float32)]
[array([-0.04505155,  0.17706214,  0.03140826, -0.23227938], dtype=float32)]
[array([-0.04151031, -0.0184942 ,  0.02676267,  0.07014295], dtype=float32)]
[array([-0.04188019,  0.17623405,  0.02816553, -0.21397753], dtype=float32)]
[array([-0.03835551, -0.01927902,  0.02388598,  0.08745533], dtype=float32)]
[array([-0.03874109,  0.17549255,  0.02563509, -0.19759688], dtype=float

Iteration 2:   4%|▍         | 4/100 [00:00<00:05, 16.88it/s]

[array([ 0.05205182, -0.3647652 , -0.20022547, -0.47205582], dtype=float32)]
(array([-0.00941785,  0.00686287,  0.00304713, -0.0397827 ], dtype=float32), {})
[array([ 0.03600405,  0.02138786, -0.03511687, -0.03897474], dtype=float32)]
[array([ 0.03643181,  0.21699533, -0.03589636, -0.34252727], dtype=float32)]
[array([ 0.04077172,  0.4126091 , -0.04274691, -0.64631015], dtype=float32)]
[array([ 0.0490239 ,  0.21810803, -0.05567311, -0.36738884], dtype=float32)]
[array([ 0.05338606,  0.02381956, -0.06302089, -0.09276711], dtype=float32)]
[array([ 0.05386245,  0.21978553, -0.06487623, -0.40464842], dtype=float32)]
[array([ 0.05825816,  0.02564074, -0.0729692 , -0.1331041 ], dtype=float32)]
[array([ 0.05877098,  0.22172794, -0.07563128, -0.4478861 ], dtype=float32)]
[array([ 0.06320553,  0.02775277, -0.084589  , -0.17996953], dtype=float32)]
[array([ 0.06376059, -0.16606326, -0.08818839,  0.08487551], dtype=float32)]
[array([ 0.06043933,  0.03020487, -0.08649088, -0.23427641], dtype=float

Iteration 2:   7%|▋         | 7/100 [00:00<00:04, 19.66it/s]

[array([ 0.07883192,  0.22211395,  0.01910982, -0.00978591], dtype=float32)]
[array([0.0832742 , 0.02672322, 0.0189141 , 0.2888646 ], dtype=float32)]
[array([0.08380867, 0.22157042, 0.02469139, 0.0022065 ], dtype=float32)]
[array([0.08824007, 0.02610322, 0.02473552, 0.30257645], dtype=float32)]
[array([ 0.08876213, -0.16936237,  0.03078705,  0.60295665], dtype=float32)]
[array([0.08537489, 0.02531575, 0.04284618, 0.32012782], dtype=float32)]
[array([0.0858812 , 0.21980214, 0.04924874, 0.04125889], dtype=float32)]
[array([0.09027725, 0.02400984, 0.05007392, 0.34906453], dtype=float32)]
[array([ 0.09075744, -0.1717872 ,  0.05705521,  0.65710753], dtype=float32)]
[array([0.0873217 , 0.02249607, 0.07019736, 0.382922  ], dtype=float32)]
[array([ 0.08777162, -0.17354871,  0.0778558 ,  0.696886  ], dtype=float32)]
[array([0.08430064, 0.02041218, 0.09179352, 0.42969248], dtype=float32)]
[array([0.08470889, 0.21412247, 0.10038737, 0.16729975], dtype=float32)]
[array([ 0.08899134,  0.40767488,  

Iteration 2:   9%|▉         | 9/100 [00:00<00:05, 17.29it/s]

[array([-0.06896215, -0.19694366, -0.10744745, -0.69443077], dtype=float32)]
[array([-0.07290102, -0.39042413, -0.12133607, -0.43741253], dtype=float32)]
[array([-0.0807095 , -0.5836384 , -0.13008432, -0.18530722], dtype=float32)]
[array([-0.09238227, -0.38691843, -0.13379046, -0.51603115], dtype=float32)]
[array([-0.10012064, -0.5799278 , -0.14411108, -0.26832148], dtype=float32)]
[array([-0.1117192 , -0.38307473, -0.14947751, -0.60276234], dtype=float32)]
[array([-0.11938069, -0.57582474, -0.16153276, -0.36064425], dtype=float32)]
[array([-0.13089718, -0.37881956, -0.16874565, -0.6995895 ], dtype=float32)]
[array([-0.13847357, -0.5712502 , -0.18273744, -0.46441954], dtype=float32)]
[array([-0.14989857, -0.37408012, -0.19202583, -0.8086766 ], dtype=float32)]
[array([-0.15738018, -0.5661253 , -0.20819935, -0.5820129 ], dtype=float32)]
(array([0.04520074, 0.0484615 , 0.0102641 , 0.04078381], dtype=float32), {})
[array([-0.01429568, -0.02253465,  0.0230172 ,  0.00588547], dtype=float32)]

Iteration 2:   9%|▉         | 9/100 [00:00<00:05, 17.29it/s, episode=210, return=63.900]

[array([-0.08001526, -0.1986483 , -0.00809072,  0.01889918], dtype=float32)]
[array([-0.08398822, -0.00341126, -0.00771274, -0.27632543], dtype=float32)]
[array([-0.08405645,  0.19181988, -0.01323924, -0.571431  ], dtype=float32)]
[array([-0.08022005,  0.38712496, -0.02466786, -0.86825514], dtype=float32)]
[array([-0.07247755,  0.19234715, -0.04203296, -0.5834289 ], dtype=float32)]
[array([-0.06863061,  0.38803196, -0.05370155, -0.88905096], dtype=float32)]
[array([-0.06086997,  0.19367829, -0.07148256, -0.6137214 ], dtype=float32)]
[array([-0.05699641, -0.00037581, -0.08375699, -0.34438157], dtype=float32)]
[array([-0.05700392,  0.19583157, -0.09064462, -0.66225666], dtype=float32)]
[array([-0.05308729,  0.00207992, -0.10388976, -0.39943537], dtype=float32)]
[array([-0.05304569, -0.19142666, -0.11187846, -0.14122917], dtype=float32)]
[array([-0.05687422,  0.00510501, -0.11470304, -0.46700734], dtype=float32)]
[array([-0.05677212, -0.18822537, -0.1240432 , -0.21256618], dtype=float32)]

Iteration 2:  11%|█         | 11/100 [00:00<00:05, 16.45it/s, episode=210, return=63.900]

[array([0.18645415, 0.5210999 , 0.06237264, 0.11007312], dtype=float32)]
[array([0.19687615, 0.32514223, 0.06457411, 0.42176378], dtype=float32)]
[array([0.20337899, 0.12916768, 0.07300938, 0.73408455], dtype=float32)]
[array([0.20596235, 0.3232091 , 0.08769108, 0.4652435 ], dtype=float32)]
[array([0.21242653, 0.5169896 , 0.09699594, 0.20143776], dtype=float32)]
[array([ 0.22276632,  0.7106002 ,  0.10102469, -0.05914091], dtype=float32)]
[array([0.23697832, 0.5141857 , 0.09984188, 0.26362935], dtype=float32)]
[array([0.24726205, 0.70775133, 0.10511447, 0.00403164], dtype=float32)]
[array([0.26141706, 0.5112912 , 0.1051951 , 0.32794207], dtype=float32)]
[array([0.2716429 , 0.70477057, 0.11175394, 0.07019772], dtype=float32)]
[array([ 0.2857383 ,  0.8981277 ,  0.1131579 , -0.18524142], dtype=float32)]
[array([0.30370086, 0.70158386, 0.10945307, 0.1408863 ], dtype=float32)]
[array([0.31773254, 0.5050784 , 0.11227079, 0.46599567], dtype=float32)]
[array([0.3278341 , 0.69844985, 0.1215907 ,

Iteration 2:  13%|█▎        | 13/100 [00:00<00:05, 15.37it/s, episode=210, return=63.900]

[array([ 0.02026742, -0.3926056 , -0.08393564,  0.371936  ], dtype=float32)]
[array([ 0.01241531, -0.58644104, -0.07649692,  0.6370175 ], dtype=float32)]
[array([ 0.00068649, -0.39034036, -0.06375657,  0.3212585 ], dtype=float32)]
[array([-0.00712032, -0.5844992 , -0.0573314 ,  0.59317327], dtype=float32)]
[array([-0.0188103 , -0.38862354, -0.04546794,  0.28299633], dtype=float32)]
[array([-0.02658277, -0.19288354, -0.03980801, -0.0236732 ], dtype=float32)]
[array([-0.03044044,  0.00278602, -0.04028147, -0.32864547], dtype=float32)]
[array([-0.03038472, -0.19174002, -0.04685438, -0.04893265], dtype=float32)]
[array([-0.03421953,  0.00402137, -0.04783304, -0.35602263], dtype=float32)]
[array([-0.0341391 , -0.19038902, -0.05495349, -0.07879815], dtype=float32)]
[array([-0.03794688, -0.38468188, -0.05652945,  0.19605301], dtype=float32)]
[array([-0.04564052, -0.5789516 , -0.05260839,  0.4703813 ], dtype=float32)]
[array([-0.05721955, -0.3831275 , -0.04320076,  0.16159183], dtype=float32)]

Iteration 2:  16%|█▌        | 16/100 [00:00<00:04, 18.43it/s, episode=210, return=63.900]

[array([ 0.08033794,  0.7884233 ,  0.0039624 , -0.99834186], dtype=float32)]
[array([ 0.09610641,  0.5932486 , -0.01600444, -0.70441717], dtype=float32)]
[array([ 0.10797138,  0.78858864, -0.03009278, -1.0020947 ], dtype=float32)]
[array([ 0.12374315,  0.98409945, -0.05013468, -1.3040742 ], dtype=float32)]
[array([ 0.14342514,  0.7896479 , -0.07621616, -1.0274967 ], dtype=float32)]
[array([ 0.1592181,  0.9856971, -0.0967661, -1.3431027], dtype=float32)]
[array([ 0.17893204,  0.79191655, -0.12362815, -1.0821966 ], dtype=float32)]
[array([ 0.19477038,  0.59862375, -0.14527208, -0.8307244 ], dtype=float32)]
[array([ 0.20674285,  0.40575433, -0.16188657, -0.58702755], dtype=float32)]
[array([ 0.21485794,  0.21322551, -0.17362712, -0.34939736], dtype=float32)]
[array([ 0.21912244,  0.02094321, -0.18061507, -0.11610073], dtype=float32)]
[array([ 0.21954131, -0.17119314, -0.18293709,  0.11460308], dtype=float32)]
[array([ 0.21611744, -0.36328682, -0.18064502,  0.34445155], dtype=float32)]
[ar

Iteration 2:  19%|█▉        | 19/100 [00:01<00:04, 19.79it/s, episode=210, return=63.900]

[array([-0.00238794, -0.3423034 ,  0.00821537,  0.5800729 ], dtype=float32)]
[array([-0.00923401, -0.14729753,  0.01981683,  0.28998923], dtype=float32)]
[array([-0.01217996,  0.04753632,  0.02561662,  0.0036216 ], dtype=float32)]
[array([-0.01122923, -0.14794347,  0.02568905,  0.30427554], dtype=float32)]
[array([-0.0141881 , -0.3434219 ,  0.03177456,  0.6049482 ], dtype=float32)]
[array([-0.02105654, -0.14875838,  0.04387352,  0.32244048], dtype=float32)]
[array([-0.0240317 , -0.34447676,  0.05032233,  0.62863016], dtype=float32)]
[array([-0.03092124, -0.54026353,  0.06289493,  0.9367271 ], dtype=float32)]
[array([-0.04172651, -0.34604356,  0.08162948,  0.6644521 ], dtype=float32)]
[array([-0.04864738, -0.1521463 ,  0.09491852,  0.39854708], dtype=float32)]
[array([-0.05169031,  0.04150995,  0.10288946,  0.13723446], dtype=float32)]
[array([-0.05086011,  0.23501924,  0.10563415, -0.12129825], dtype=float32)]
[array([-0.04615973,  0.4284816 ,  0.10320818, -0.37887555], dtype=float32)]

Iteration 2:  19%|█▉        | 19/100 [00:01<00:04, 19.79it/s, episode=220, return=57.700]

[array([ 0.02494474,  0.37376538, -0.00317878, -0.5096691 ], dtype=float32)]
[array([ 0.03242005,  0.17868835, -0.01337216, -0.21798961], dtype=float32)]
[array([ 0.03599382, -0.01623992, -0.01773195,  0.07044535], dtype=float32)]
[array([ 0.03566902,  0.1791317 , -0.01632304, -0.22777893], dtype=float32)]
[array([ 0.03925165, -0.01575322, -0.02087862,  0.05971075], dtype=float32)]
[array([ 0.03893659,  0.17966177, -0.01968441, -0.23948571], dtype=float32)]
[array([ 0.04252983, -0.01517353, -0.02447412,  0.04692377], dtype=float32)]
[array([ 0.04222635, -0.20993614, -0.02353565,  0.33178553], dtype=float32)]
[array([ 0.03802763, -0.01448723, -0.01689994,  0.03177451], dtype=float32)]
[array([ 0.03773789,  0.18087295, -0.01626444, -0.26619226], dtype=float32)]
[array([ 0.04135535,  0.3762232 , -0.02158829, -0.56396043], dtype=float32)]
[array([ 0.04887981,  0.18141071, -0.0328675 , -0.27815622], dtype=float32)]
[array([ 0.05250802,  0.37698576, -0.03843062, -0.58102167], dtype=float32)]

Iteration 2:  22%|██▏       | 22/100 [00:01<00:04, 17.22it/s, episode=220, return=57.700]

[array([0.74159366, 1.1095501 , 0.17927296, 0.21893735], dtype=float32)]
[array([0.76378465, 0.9123789 , 0.18365172, 0.5623786 ], dtype=float32)]
[array([0.78203225, 1.1045133 , 0.19489928, 0.33271414], dtype=float32)]
[array([0.8041225 , 1.2964047 , 0.20155357, 0.10726501], dtype=float32)]
[array([0.8300506 , 1.0990508 , 0.20369886, 0.4561596 ], dtype=float32)]
(array([ 0.04304458, -0.00782161, -0.03888594, -0.00804793], dtype=float32), {})
[array([-1.0318319e-02,  2.2126733e-06, -2.5072692e-02, -1.7376885e-02],
      dtype=float32)]
[array([-0.01031828,  0.1954746 , -0.02542023, -0.3178639 ], dtype=float32)]
[array([-0.00640878,  0.00072375, -0.03177751, -0.03330476], dtype=float32)]
[array([-0.00639431,  0.19628665, -0.0324436 , -0.335842  ], dtype=float32)]
[array([-0.00246858,  0.00164108, -0.03916044, -0.05356406], dtype=float32)]
[array([-0.00243575, -0.19289812, -0.04023172,  0.2265108 ], dtype=float32)]
[array([-0.00629372, -0.3874227 , -0.03570151,  0.5062366 ], dtype=float32

Iteration 2:  25%|██▌       | 25/100 [00:01<00:03, 19.58it/s, episode=220, return=57.700]

[array([-0.01136174,  0.02861595, -0.15770979, -0.5153164 ], dtype=float32)]
[array([-0.01078943, -0.16397461, -0.16801612, -0.27619046], dtype=float32)]
[array([-0.01406892,  0.03309615, -0.17353994, -0.6167987 ], dtype=float32)]
[array([-0.01340699, -0.15923116, -0.18587591, -0.38340598], dtype=float32)]
[array([-0.01659162, -0.35129488, -0.19354403, -0.15460628], dtype=float32)]
[array([-0.02361752, -0.54319495, -0.19663616,  0.07132196], dtype=float32)]
[array([-0.03448141, -0.73503417, -0.19520971,  0.29609627], dtype=float32)]
[array([-0.0491821 , -0.537743  , -0.1892878 , -0.05124767], dtype=float32)]
[array([-0.05993696, -0.34048223, -0.19031274, -0.3971726 ], dtype=float32)]
[array([-0.0667466 , -0.53246665, -0.1982562 , -0.17001012], dtype=float32)]
[array([-0.07739594, -0.33514103, -0.2016564 , -0.5181069 ], dtype=float32)]
(array([-0.0084711 , -0.04145597, -0.041793  ,  0.00870443], dtype=float32), {})
[array([-0.04958802,  0.02317211, -0.01314428, -0.02371743], dtype=float

Iteration 2:  28%|██▊       | 28/100 [00:01<00:03, 19.03it/s, episode=220, return=57.700]

[array([1.0793226 , 1.1065398 , 0.18048383, 0.14954868], dtype=float32)]
[array([1.1014534, 0.9093541, 0.1834748, 0.4932975], dtype=float32)]
[array([1.1196405 , 1.1014788 , 0.19334075, 0.26358697], dtype=float32)]
[array([1.14167   , 1.2933911 , 0.19861248, 0.03757074], dtype=float32)]
[array([1.1675378 , 1.0960575 , 0.1993639 , 0.38576114], dtype=float32)]
[array([1.189459  , 1.2878737 , 0.20707913, 0.16196135], dtype=float32)]
(array([ 0.0451043 , -0.04776286,  0.01269644, -0.04821337], dtype=float32), {})
[array([-0.01179974, -0.01874409, -0.0419658 ,  0.02831898], dtype=float32)]
[array([-0.01217462,  0.17695376, -0.04139942, -0.2773037 ], dtype=float32)]
[array([-0.00863555, -0.01755386, -0.04694549,  0.00203981], dtype=float32)]
[array([-0.00898662,  0.17820883, -0.0469047 , -0.3050776 ], dtype=float32)]
[array([-0.00542245, -0.01621441, -0.05300625, -0.02754842], dtype=float32)]
[array([-0.00574674,  0.17962603, -0.05355722, -0.33647305], dtype=float32)]
[array([-0.00215422,  0

Iteration 2:  29%|██▉       | 29/100 [00:01<00:03, 19.03it/s, episode=230, return=77.400]

[array([-0.28338   , -1.0824488 , -0.07653986,  0.581044  ], dtype=float32)]
[array([-0.305029  , -1.2764196 , -0.06491898,  0.8486675 ], dtype=float32)]
[array([-0.33055738, -1.0804751 , -0.04794563,  0.5362972 ], dtype=float32)]
[array([-0.3521669 , -0.884713  , -0.03721968,  0.22890079], dtype=float32)]
[array([-0.36986116, -1.0792838 , -0.03264167,  0.5096149 ], dtype=float32)]
[array([-0.39144683, -0.8837176 , -0.02244937,  0.20682684], dtype=float32)]
[array([-0.4091212 , -0.68828195, -0.01831284, -0.09285235], dtype=float32)]
[array([-0.42288682, -0.8831367 , -0.02016988,  0.1939971 ], dtype=float32)]
[array([-0.44054955, -0.6877321 , -0.01628994, -0.1049797 ], dtype=float32)]
[array([-0.4543042 , -0.8826169 , -0.01838953,  0.18251963], dtype=float32)]
[array([-0.47195652, -0.68723667, -0.01473914, -0.11590733], dtype=float32)]
[array([-0.48570126, -0.8821444 , -0.01705729,  0.17208931], dtype=float32)]
[array([-0.5033441 , -1.0770181 , -0.0136155 ,  0.45934275], dtype=float32)]

Iteration 2:  31%|███       | 31/100 [00:01<00:04, 16.03it/s, episode=230, return=77.400]

[array([ 0.08279566, -0.02411157, -0.09026457, -0.14550947], dtype=float32)]
[array([ 0.08231343,  0.17217931, -0.09317476, -0.46524888], dtype=float32)]
[array([ 0.08575701, -0.02151106, -0.10247974, -0.20332731], dtype=float32)]
[array([ 0.08532679, -0.21502951, -0.10654629,  0.05535109], dtype=float32)]
[array([ 0.0810262 , -0.01855396, -0.10543927, -0.26895636], dtype=float32)]
[array([ 0.08065512,  0.17790231, -0.11081839, -0.592946  ], dtype=float32)]
[array([ 0.08421317, -0.01550817, -0.12267731, -0.3371238 ], dtype=float32)]
[array([ 0.08390301, -0.20869026, -0.12941979, -0.08550519], dtype=float32)]
[array([ 0.0797292 , -0.01197355, -0.13112989, -0.4160558 ], dtype=float32)]
[array([ 0.07948973,  0.18473934, -0.13945101, -0.7470337 ], dtype=float32)]
[array([ 0.08318452, -0.0082114 , -0.15439168, -0.50128305], dtype=float32)]
[array([ 0.08302028, -0.20085852, -0.16441734, -0.26096064], dtype=float32)]
[array([ 0.07900312, -0.3932984 , -0.16963656, -0.02431492], dtype=float32)]

Iteration 2:  34%|███▍      | 34/100 [00:01<00:03, 17.24it/s, episode=230, return=77.400]

(array([ 0.01050263,  0.03698239, -0.04013811, -0.0460868 ], dtype=float32), {})
[array([ 0.0333731 , -0.00626746,  0.00506693, -0.0227285 ], dtype=float32)]
[array([ 0.03324775, -0.20146172,  0.00461236,  0.27154878], dtype=float32)]
[array([ 0.02921851, -0.00640588,  0.01004334, -0.01967582], dtype=float32)]
[array([ 0.0290904 ,  0.1885706 ,  0.00964982, -0.3091731 ], dtype=float32)]
[array([ 0.03286181,  0.38355374,  0.00346636, -0.5987972 ], dtype=float32)]
[array([ 0.04053288,  0.18838347, -0.00850958, -0.30502442], dtype=float32)]
[array([ 0.04430055,  0.38362566, -0.01461007, -0.6003789 ], dtype=float32)]
[array([ 0.05197306,  0.1887111 , -0.02661765, -0.31233346], dtype=float32)]
[array([ 0.05574729, -0.00602173, -0.03286432, -0.02816236], dtype=float32)]
[array([ 0.05562685,  0.18955572, -0.03342757, -0.33103052], dtype=float32)]
[array([ 0.05941797, -0.00507485, -0.04004818, -0.04907361], dtype=float32)]
[array([ 0.05931647, -0.19960034, -0.04102965,  0.23070964], dtype=float

Iteration 2:  36%|███▌      | 36/100 [00:02<00:04, 14.34it/s, episode=230, return=77.400]

(array([-0.03427485, -0.01044115, -0.0002013 , -0.00027474], dtype=float32), {})
[array([ 0.03197561, -0.00042106, -0.01975002, -0.02322449], dtype=float32)]
[array([ 0.03196719, -0.19525428, -0.02021451,  0.26316223], dtype=float32)]
[array([ 0.0280621 ,  0.00015028, -0.01495127, -0.0358274 ], dtype=float32)]
[array([ 0.02806511,  0.19548342, -0.01566781, -0.33318985], dtype=float32)]
[array([ 0.03197478,  0.00058793, -0.02233161, -0.04548866], dtype=float32)]
[array([ 0.03198653,  0.19602287, -0.02324139, -0.34513292], dtype=float32)]
[array([ 0.03590699,  0.00123911, -0.03014404, -0.05986854], dtype=float32)]
[array([ 0.03593177, -0.19343796, -0.03134141,  0.2231534 ], dtype=float32)]
[array([ 0.03206301,  0.0021176 , -0.02687835, -0.07924876], dtype=float32)]
[array([ 0.03210537, -0.19260894, -0.02846332,  0.20483425], dtype=float32)]
[array([ 0.02825319, -0.38731253, -0.02436664,  0.48840427], dtype=float32)]
[array([ 0.02050694, -0.19185542, -0.01459855,  0.18814255], dtype=float

Iteration 2:  38%|███▊      | 38/100 [00:02<00:04, 14.14it/s, episode=230, return=77.400]

[array([-0.00079974,  0.152008  ,  0.11706315,  0.06817067], dtype=float32)]
[array([ 0.00224042, -0.04458075,  0.11842656,  0.39537364], dtype=float32)]
[array([0.00134881, 0.14867924, 0.12633404, 0.14225018], dtype=float32)]
[array([ 0.00432239,  0.34178677,  0.12917905, -0.10805915], dtype=float32)]
[array([ 0.01115813,  0.5348437 ,  0.12701786, -0.3573586 ], dtype=float32)]
[array([ 0.021855  ,  0.33816636,  0.11987069, -0.02747633], dtype=float32)]
[array([ 0.02861833,  0.5313835 ,  0.11932116, -0.28006467], dtype=float32)]
[array([0.039246  , 0.33477947, 0.11371987, 0.04774174], dtype=float32)]
[array([ 0.04594159,  0.52810264,  0.1146747 , -0.20700835], dtype=float32)]
[array([ 0.05650364,  0.72141397,  0.11053453, -0.4614303 ], dtype=float32)]
[array([ 0.07093192,  0.9148143 ,  0.10130592, -0.7173294 ], dtype=float32)]
[array([ 0.08922821,  0.7184471 ,  0.08695934, -0.39455575], dtype=float32)]
[array([ 0.10359715,  0.5222058 ,  0.07906822, -0.07577225], dtype=float32)]
[array(

Iteration 2:  40%|████      | 40/100 [00:02<00:04, 14.64it/s, episode=240, return=82.800]

[array([-0.03020904, -0.13832499, -0.04221101,  0.11904035], dtype=float32)]
[array([-0.03297554, -0.33281755, -0.0398302 ,  0.39811295], dtype=float32)]
[array([-0.03963189, -0.13715383, -0.03186795,  0.09314317], dtype=float32)]
[array([-0.04237497,  0.05841005, -0.03000508, -0.20942129], dtype=float32)]
[array([-0.04120677,  0.25394788, -0.03419351, -0.5114162 ], dtype=float32)]
[array([-0.03612781,  0.44953442, -0.04442183, -0.8146754 ], dtype=float32)]
[array([-0.02713712,  0.255048  , -0.06071534, -0.53628933], dtype=float32)]
[array([-0.02203616,  0.06082995, -0.07144113, -0.2633384 ], dtype=float32)]
[array([-0.02081956, -0.1332034 , -0.07670789,  0.00598339], dtype=float32)]
[array([-0.02348363,  0.06293006, -0.07658823, -0.3098817 ], dtype=float32)]
[array([-0.02222503, -0.13102189, -0.08278586, -0.04230155], dtype=float32)]
[array([-0.02484547, -0.32486513, -0.0836319 ,  0.22315627], dtype=float32)]
[array([-0.03134277, -0.51869833, -0.07916877,  0.48833078], dtype=float32)]

Iteration 2:  42%|████▏     | 42/100 [00:02<00:03, 14.84it/s, episode=240, return=82.800]

[array([ 0.00748923, -0.11933757, -0.09981727, -0.30260825], dtype=float32)]
[array([ 0.00510248, -0.31290573, -0.10586943, -0.04299884], dtype=float32)]
[array([-0.00115564, -0.1164375 , -0.10672941, -0.3671187 ], dtype=float32)]
[array([-0.00348439, -0.30989376, -0.11407178, -0.10990439], dtype=float32)]
[array([-0.00968226, -0.11333769, -0.11626987, -0.43628675], dtype=float32)]
[array([-0.01194902, -0.30663833, -0.1249956 , -0.18239982], dtype=float32)]
[array([-0.01808178, -0.4997707 , -0.1286436 ,  0.06838545], dtype=float32)]
[array([-0.0280772 , -0.6928361 , -0.1272759 ,  0.31787354], dtype=float32)]
[array([-0.04193392, -0.49615285, -0.12091842, -0.01208313], dtype=float32)]
[array([-0.05185698, -0.68935186, -0.12116008,  0.24013586], dtype=float32)]
[array([-0.06564401, -0.8825534 , -0.11635736,  0.49227977], dtype=float32)]
[array([-0.08329508, -0.68599904, -0.10651177,  0.16530988], dtype=float32)]
[array([-0.09701506, -0.4895264 , -0.10320558, -0.15898426], dtype=float32)]

Iteration 2:  44%|████▍     | 44/100 [00:02<00:03, 15.29it/s, episode=240, return=82.800]

[array([-0.05203111,  0.01207819,  0.02797822,  0.00076687], dtype=float32)]
[array([-0.05178955, -0.1834336 ,  0.02799355,  0.30214432], dtype=float32)]
[array([-0.05545822,  0.01127842,  0.03403644,  0.01841973], dtype=float32)]
[array([-0.05523265, -0.1843147 ,  0.03440483,  0.32164428], dtype=float32)]
[array([-0.05891895, -0.37990928,  0.04083772,  0.6249755 ], dtype=float32)]
[array([-0.06651713, -0.1853805 ,  0.05333723,  0.34542862], dtype=float32)]
[array([-0.07022474,  0.00894375,  0.0602458 ,  0.07003046], dtype=float32)]
[array([-0.07004587, -0.18698786,  0.06164641,  0.38109624], dtype=float32)]
[array([-0.07378563,  0.00720702,  0.06926834,  0.10846979], dtype=float32)]
[array([-0.07364149,  0.2012715 ,  0.07143773, -0.16158025], dtype=float32)]
[array([-0.06961606,  0.00520343,  0.06820612,  0.15275657], dtype=float32)]
[array([-0.06951199, -0.19082549,  0.07126126,  0.46615332], dtype=float32)]
[array([-0.0733285 ,  0.00322104,  0.08058432,  0.19675533], dtype=float32)]

Iteration 2:  46%|████▌     | 46/100 [00:02<00:04, 13.39it/s, episode=240, return=82.800]

[array([ 0.08749167,  0.92779565,  0.12542735, -0.46489072], dtype=float32)]
[array([ 0.10604758,  1.1209428 ,  0.11612954, -0.71555644], dtype=float32)]
[array([ 0.12846643,  0.92442125,  0.10181841, -0.38869423], dtype=float32)]
[array([ 0.14695486,  0.72801256,  0.09404453, -0.06572415], dtype=float32)]
[array([0.16151512, 0.5316769 , 0.09273005, 0.25508633], dtype=float32)]
[array([ 0.17214864,  0.72536093,  0.09783177, -0.00696796], dtype=float32)]
[array([0.18665586, 0.528982  , 0.09769242, 0.31490862], dtype=float32)]
[array([0.19723551, 0.72258645, 0.10399058, 0.05456254], dtype=float32)]
[array([0.21168724, 0.526139  , 0.10508183, 0.37815997], dtype=float32)]
[array([0.22221002, 0.32969388, 0.11264504, 0.7020395 ], dtype=float32)]
[array([0.22880389, 0.52308923, 0.12668583, 0.4468327 ], dtype=float32)]
[array([0.23926568, 0.71621275, 0.13562247, 0.19661504], dtype=float32)]
[array([0.25358993, 0.51943785, 0.13955478, 0.5288179 ], dtype=float32)]
[array([0.2639787 , 0.3226569 ,

Iteration 2:  48%|████▊     | 48/100 [00:03<00:03, 13.61it/s, episode=240, return=82.800]

[array([-0.02740844,  0.18182853,  0.04055592, -0.17984076], dtype=float32)]
[array([-0.02377187,  0.37634736,  0.03695911, -0.45945892], dtype=float32)]
[array([-0.01624492,  0.18072301,  0.02776993, -0.15535927], dtype=float32)]
[array([-0.01263046, -0.01478532,  0.02466274,  0.14595346], dtype=float32)]
[array([-0.01292617,  0.17997493,  0.02758181, -0.13884795], dtype=float32)]
[array([-0.00932667, -0.01553099,  0.02480485,  0.16240741], dtype=float32)]
[array([-0.00963729,  0.17922723,  0.028053  , -0.12234819], dtype=float32)]
[array([-0.00605274, -0.01628516,  0.02560604,  0.17905152], dtype=float32)]
[array([-0.00637845, -0.211764  ,  0.02918707,  0.479701  ], dtype=float32)]
[array([-0.01061373, -0.40728557,  0.03878109,  0.78143823], dtype=float32)]
[array([-0.01875944, -0.21271753,  0.05440985,  0.5012044 ], dtype=float32)]
[array([-0.02301379, -0.40856254,  0.06443394,  0.8105258 ], dtype=float32)]
[array([-0.03118504, -0.6045053 ,  0.08064446,  1.1227605 ], dtype=float32)]

Iteration 2:  50%|█████     | 50/100 [00:03<00:04, 12.34it/s, episode=250, return=90.300]

[array([0.09733874, 0.31468603, 0.12465769, 0.08949458], dtype=float32)]
[array([ 0.10363246,  0.50782114,  0.12644759, -0.16140561], dtype=float32)]
[array([ 0.11378888,  0.70092744,  0.12321948, -0.4116758 ], dtype=float32)]
[array([ 0.12780742,  0.50429386,  0.11498596, -0.08282678], dtype=float32)]
[array([ 0.1378933 ,  0.6975958 ,  0.11332942, -0.33713356], dtype=float32)]
[array([ 0.15184522,  0.5010589 ,  0.10658675, -0.01097175], dtype=float32)]
[array([0.1618664 , 0.30458257, 0.10636731, 0.3133474 ], dtype=float32)]
[array([0.16795805, 0.49804115, 0.11263426, 0.05601261], dtype=float32)]
[array([ 0.17791887,  0.691383  ,  0.11375452, -0.199118  ], dtype=float32)]
[array([0.19174653, 0.4948334 , 0.10977215, 0.12717271], dtype=float32)]
[array([0.2016432 , 0.29832408, 0.11231561, 0.4523708 ], dtype=float32)]
[array([0.20760968, 0.4916934 , 0.12136302, 0.19709577], dtype=float32)]
[array([0.21744356, 0.29506344, 0.12530494, 0.5254645 ], dtype=float32)]
[array([0.22334482, 0.48821

Iteration 2:  52%|█████▏    | 52/100 [00:03<00:04, 10.38it/s, episode=250, return=90.300]

[array([0.44545183, 0.7517166 , 0.06886791, 0.18312632], dtype=float32)]
[array([0.46048617, 0.55568033, 0.07253043, 0.49671492], dtype=float32)]
[array([0.4715998 , 0.7497086 , 0.08246473, 0.22774294], dtype=float32)]
[array([0.48659393, 0.5535109 , 0.08701959, 0.54525656], dtype=float32)]
[array([0.49766415, 0.74730927, 0.09792472, 0.28121042], dtype=float32)]
[array([0.5126104 , 0.94090796, 0.10354893, 0.02094755], dtype=float32)]
[array([ 0.5314285 ,  1.1344042 ,  0.10396788, -0.23735319], dtype=float32)]
[array([0.5541166 , 0.9379624 , 0.09922082, 0.08622986], dtype=float32)]
[array([0.57287586, 0.74156857, 0.10094541, 0.40849486], dtype=float32)]
[array([0.5877072 , 0.9351252 , 0.10911531, 0.14926547], dtype=float32)]
[array([0.6064097 , 0.7386236 , 0.11210062, 0.47428113], dtype=float32)]
[array([0.6211822 , 0.54211193, 0.12158624, 0.80008715], dtype=float32)]
[array([0.6320244 , 0.34555072, 0.13758798, 1.1284107 ], dtype=float32)]
[array([0.63893545, 0.5386289 , 0.1601562 , 0.8

Iteration 2:  54%|█████▍    | 54/100 [00:03<00:04, 10.47it/s, episode=250, return=90.300]

[array([0.8036582 , 1.227466  , 0.18328933, 0.51976025], dtype=float32)]
[array([0.8282075 , 1.4195988 , 0.19368453, 0.28997436], dtype=float32)]
[array([0.85659945, 1.6115075 , 0.19948402, 0.06408168], dtype=float32)]
[array([0.88882965, 1.4141679 , 0.20076565, 0.41248122], dtype=float32)]
[array([0.917113  , 1.2168506 , 0.20901528, 0.7611415 ], dtype=float32)]
(array([-0.02259561,  0.00349015, -0.0169781 , -0.01919235], dtype=float32), {})
[array([-0.03017979, -0.01487033, -0.03343065,  0.00741042], dtype=float32)]
[array([-0.0304772 , -0.20949729, -0.03328244,  0.28936094], dtype=float32)]
[array([-0.03466715, -0.01391694, -0.02749522, -0.01363034], dtype=float32)]
[array([-0.03494549,  0.18158832, -0.02776783, -0.31485996], dtype=float32)]
[array([-0.03131372, -0.01312731, -0.03406502, -0.0310618 ], dtype=float32)]
[array([-0.03157626,  0.18246618, -0.03468626, -0.334295  ], dtype=float32)]
[array([-0.02792694, -0.01214538, -0.04137216, -0.05274897], dtype=float32)]
[array([-0.0281

Iteration 2:  56%|█████▌    | 56/100 [00:03<00:03, 11.27it/s, episode=250, return=90.300]

[array([-0.0848701 , -0.12764214, -0.08987168, -0.33892822], dtype=float32)]
[array([-0.08742294,  0.06863603, -0.09665025, -0.658544  ], dtype=float32)]
[array([-0.08605023, -0.1250173 , -0.10982113, -0.39779064], dtype=float32)]
[array([-0.08855057, -0.3184238 , -0.11777694, -0.14165111], dtype=float32)]
[array([-0.09491905, -0.12182927, -0.12060996, -0.46904644], dtype=float32)]
[array([-0.09735563,  0.07477161, -0.12999089, -0.79717886], dtype=float32)]
[array([-0.0958602 , -0.11834995, -0.14593446, -0.54805034], dtype=float32)]
[array([-0.0982272 , -0.31115288, -0.15689547, -0.30467266], dtype=float32)]
[array([-0.10445026, -0.11418357, -0.16298893, -0.64243585], dtype=float32)]
[array([-0.10673393, -0.30670363, -0.17583765, -0.4051899 ], dtype=float32)]
[array([-0.112868  , -0.10958097, -0.18394144, -0.74774486], dtype=float32)]
[array([-0.11505962, -0.3017534 , -0.19889633, -0.5181233 ], dtype=float32)]
[array([-0.12109469, -0.4936012 , -0.20925881, -0.2941157 ], dtype=float32)]

Iteration 2:  59%|█████▉    | 59/100 [00:04<00:04,  9.25it/s, episode=260, return=102.400]

[array([-0.7167484 , -0.08825618, -0.09310921, -0.9629809 ], dtype=float32)]
[array([-0.71851355, -0.28201208, -0.11236882, -0.70094055], dtype=float32)]
[array([-0.7241538 , -0.47541183, -0.12638763, -0.44563746], dtype=float32)]
[array([-0.733662  , -0.27874961, -0.13530038, -0.775337  ], dtype=float32)]
[array([-0.739237  , -0.47177687, -0.15080713, -0.5280993 ], dtype=float32)]
[array([-0.74867254, -0.6644913 , -0.1613691 , -0.286478  ], dtype=float32)]
[array([-0.7619624 , -0.8569882 , -0.16709867, -0.04872086], dtype=float32)]
[array([-0.77910215, -1.0493691 , -0.16807309,  0.18693236], dtype=float32)]
[array([-0.80008954, -0.85229105, -0.16433443, -0.15370025], dtype=float32)]
[array([-0.81713533, -0.6552442 , -0.16740844, -0.49338567], dtype=float32)]
[array([-0.83024025, -0.8476585 , -0.17727615, -0.25778762], dtype=float32)]
[array([-0.8471934 , -0.65050715, -0.1824319 , -0.60072654], dtype=float32)]
[array([-0.86020356, -0.8426715 , -0.19444644, -0.37060133], dtype=float32)]

Iteration 2:  61%|██████    | 61/100 [00:04<00:03, 11.32it/s, episode=260, return=102.400]

[array([-0.34172982, -0.7529288 , -0.08672492,  0.18045586], dtype=float32)]
[array([-0.3567884 , -0.5566798 , -0.08311581, -0.13827649], dtype=float32)]
[array([-0.36792198, -0.75051904, -0.08588134,  0.12707068], dtype=float32)]
[array([-0.38293236, -0.5542785 , -0.08333992, -0.19142337], dtype=float32)]
[array([-0.39401793, -0.3580693 , -0.08716839, -0.50918925], dtype=float32)]
[array([-0.4011793 , -0.551862  , -0.09735218, -0.24519892], dtype=float32)]
[array([-0.41221654, -0.3554941 , -0.10225616, -0.5669324 ], dtype=float32)]
[array([-0.41932642, -0.5490442 , -0.1135948 , -0.30813473], dtype=float32)]
[array([-0.43030733, -0.3525024 , -0.1197575 , -0.63437253], dtype=float32)]
[array([-0.43735737, -0.54576826, -0.13244495, -0.38167465], dtype=float32)]
[array([-0.44827273, -0.7387854 , -0.14007844, -0.13350973], dtype=float32)]
[array([-0.46304843, -0.5419639 , -0.14274864, -0.46689874], dtype=float32)]
[array([-0.4738877 , -0.73481095, -0.1520866 , -0.22239609], dtype=float32)]

Iteration 2:  63%|██████▎   | 63/100 [00:04<00:03, 12.18it/s, episode=260, return=102.400]

[array([-0.19451658, -0.02011136,  0.17727293,  0.40389845], dtype=float32)]
[array([-0.19491881,  0.17211184,  0.1853509 ,  0.17192805], dtype=float32)]
[array([-0.19147658,  0.36416408,  0.18878946, -0.05703434], dtype=float32)]
[array([-0.1841933 ,  0.16690744,  0.18764877,  0.28876838], dtype=float32)]
[array([-0.18085515,  0.3589272 ,  0.19342414,  0.06064049], dtype=float32)]
[array([-0.17367661,  0.161634  ,  0.19463694,  0.4075743 ], dtype=float32)]
[array([-0.17044392,  0.3535406 ,  0.20278843,  0.18201241], dtype=float32)]
[array([-0.16337311,  0.5452708 ,  0.20642868, -0.04047772], dtype=float32)]
[array([-0.1524677 ,  0.34787887,  0.20561913,  0.30958393], dtype=float32)]
(array([-0.02701491,  0.02102817,  0.03516591, -0.00460912], dtype=float32), {})
[array([ 0.04259085,  0.02166897, -0.04232169,  0.04404376], dtype=float32)]
[array([ 0.04302423, -0.17282136, -0.04144082,  0.32307917], dtype=float32)]
[array([ 0.03956781,  0.02286544, -0.03497924,  0.01762086], dtype=float

Iteration 2:  65%|██████▌   | 65/100 [00:04<00:02, 12.14it/s, episode=260, return=102.400]

[array([-0.0174953 ,  0.01807666, -0.02704135, -0.00760566], dtype=float32)]
[array([-0.01713377, -0.17664726, -0.02719346,  0.27642423], dtype=float32)]
[array([-0.02066671,  0.01885189, -0.02166498, -0.02470996], dtype=float32)]
[array([-0.02028967,  0.21427773, -0.02215918, -0.32414892], dtype=float32)]
[array([-0.01600412,  0.0194782 , -0.02864215, -0.03853562], dtype=float32)]
[array([-0.01561456, -0.17522156, -0.02941287,  0.24497458], dtype=float32)]
[array([-0.01911899, -0.36991134, -0.02451338,  0.5282368 ], dtype=float32)]
[array([-0.02651721, -0.56468   , -0.01394864,  0.8130958 ], dtype=float32)]
[array([-0.03781081, -0.3693698 ,  0.00231328,  0.5160582 ], dtype=float32)]
[array([-0.04519821, -0.1742805 ,  0.01263444,  0.22410516], dtype=float32)]
[array([-0.04868382, -0.36958072,  0.01711654,  0.5207465 ], dtype=float32)]
[array([-0.05607543, -0.17470385,  0.02753148,  0.23350608], dtype=float32)]
[array([-0.05956951, -0.37020814,  0.0322016 ,  0.5347446 ], dtype=float32)]

Iteration 2:  67%|██████▋   | 67/100 [00:04<00:03, 10.64it/s, episode=260, return=102.400]

[array([0.1341533 , 0.15138006, 0.11286379, 0.55101895], dtype=float32)]
[array([0.13718091, 0.34475097, 0.12388417, 0.29591957], dtype=float32)]
[array([0.14407593, 0.53790915, 0.12980257, 0.04473171], dtype=float32)]
[array([ 0.1548341 ,  0.73095393,  0.1306972 , -0.20434465], dtype=float32)]
[array([0.16945319, 0.5342287 , 0.12661031, 0.12654221], dtype=float32)]
[array([0.18013777, 0.33754173, 0.12914115, 0.45633516], dtype=float32)]
[array([0.18688859, 0.530624  , 0.13826786, 0.20698555], dtype=float32)]
[array([0.19750108, 0.33382356, 0.14240757, 0.5398902 ], dtype=float32)]
[array([0.20417754, 0.5266868 , 0.15320536, 0.29525033], dtype=float32)]
[array([0.21471128, 0.7193305 , 0.15911038, 0.05453582], dtype=float32)]
[array([ 0.22909789,  0.9118557 ,  0.16020109, -0.18402289], dtype=float32)]
[array([0.247335  , 0.7148474 , 0.15652063, 0.15460348], dtype=float32)]
[array([ 0.26163197,  0.90742236,  0.1596127 , -0.08489836], dtype=float32)]
[array([0.2797804 , 0.71041524, 0.15791

Iteration 2:  69%|██████▉   | 69/100 [00:04<00:02, 11.30it/s, episode=260, return=102.400]

[array([-0.0495341, -0.3839671, -0.022389 ,  0.2644672], dtype=float32)]
[array([-0.05721344, -0.5787625 , -0.01709966,  0.5500052 ], dtype=float32)]
[array([-0.06878869, -0.7736401 , -0.00609955,  0.8372519 ], dtype=float32)]
[array([-0.08426149, -0.57843536,  0.01064548,  0.54265696], dtype=float32)]
[array([-0.0958302 , -0.38346463,  0.02149862,  0.25334716], dtype=float32)]
[array([-0.10349949, -0.57888687,  0.02656557,  0.5527329 ], dtype=float32)]
[array([-0.11507723, -0.38414785,  0.03762022,  0.2685368 ], dtype=float32)]
[array([-0.12276018, -0.18958242,  0.04299096, -0.01204713], dtype=float32)]
[array([-0.12655184, -0.38529372,  0.04275002,  0.293884  ], dtype=float32)]
[array([-0.1342577 , -0.19080651,  0.0486277 ,  0.01498441], dtype=float32)]
[array([-0.13807385,  0.00358555,  0.04892739, -0.26196808], dtype=float32)]
[array([-0.13800213, -0.19219942,  0.04368803,  0.04573685], dtype=float32)]
[array([-0.14184612,  0.00226972,  0.04460276, -0.23284833], dtype=float32)]
[ar

Iteration 2:  69%|██████▉   | 69/100 [00:05<00:02, 11.30it/s, episode=270, return=110.500]

[array([ 1.6306149 ,  2.3694649 ,  0.00545609, -0.713994  ], dtype=float32)]
[array([ 1.6780041 ,  2.174268  , -0.00882379, -0.4195987 ], dtype=float32)]
[array([ 1.7214895 ,  2.3695138 , -0.01721576, -0.7150503 ], dtype=float32)]
[array([ 1.7688798 ,  2.1746342 , -0.03151677, -0.42783558], dtype=float32)]
[array([ 1.8123724 ,  1.9799726 , -0.04007348, -0.14525236], dtype=float32)]
[array([ 1.851972  ,  2.1756449 , -0.04297853, -0.45030355], dtype=float32)]
[array([ 1.8954848,  1.9811562, -0.0519846, -0.1714719], dtype=float32)]
[array([ 1.935108  ,  1.7868154 , -0.05541403,  0.10436901], dtype=float32)]
[array([ 1.9708443 ,  1.9826859 , -0.05332666, -0.20526914], dtype=float32)]
[array([ 2.010498  ,  1.7883655 , -0.05743204,  0.07012653], dtype=float32)]
[array([ 2.0462654 ,  1.9842618 , -0.05602951, -0.24010918], dtype=float32)]
[array([ 2.0859506 ,  2.1801374 , -0.06083169, -0.54992574], dtype=float32)]
[array([ 2.1295533 ,  1.9859204 , -0.07183021, -0.27701244], dtype=float32)]
[ar

Iteration 2:  71%|███████   | 71/100 [00:05<00:02, 10.91it/s, episode=270, return=110.500]

[array([ 0.07250258,  0.00151023, -0.06924701, -0.10346907], dtype=float32)]
[array([ 0.07253279,  0.19755271, -0.0713164 , -0.4171704 ], dtype=float32)]
[array([ 0.07648384,  0.00351007, -0.0796598 , -0.14779554], dtype=float32)]
[array([ 0.07655405,  0.19967705, -0.08261572, -0.46450812], dtype=float32)]
[array([ 0.08054758,  0.00581377, -0.09190588, -0.19896643], dtype=float32)]
[array([ 0.08066386, -0.1878817 , -0.09588521,  0.06336827], dtype=float32)]
[array([ 0.07690623,  0.00847491, -0.09461784, -0.2579608 ], dtype=float32)]
[array([ 0.07707572, -0.18517782, -0.09977706,  0.00344287], dtype=float32)]
[array([ 0.07337216, -0.37873778, -0.0997082 ,  0.26305294], dtype=float32)]
[array([ 0.06579741, -0.18234444, -0.09444714, -0.05933957], dtype=float32)]
[array([ 0.06215052, -0.3759942 , -0.09563393,  0.20211431], dtype=float32)]
[array([ 0.05463064, -0.17964388, -0.09159165, -0.1191389 ], dtype=float32)]
[array([ 0.05103776, -0.3733423 , -0.09397443,  0.14330058], dtype=float32)]

Iteration 2:  73%|███████▎  | 73/100 [00:05<00:02, 11.14it/s, episode=270, return=110.500]

[array([ 0.68002385,  1.6875012 ,  0.16602577, -0.14679618], dtype=float32)]
[array([ 0.71377385,  1.8799046 ,  0.16308986, -0.3828451 ], dtype=float32)]
[array([ 0.751372  ,  2.0723808 ,  0.15543295, -0.6199921 ], dtype=float32)]
[array([ 0.79281956,  2.2650297 ,  0.14303312, -0.85996807], dtype=float32)]
[array([ 0.83812016,  2.0682795 ,  0.12583375, -0.5259483 ], dtype=float32)]
[array([ 0.8794858 ,  1.8716325 ,  0.11531478, -0.1964123 ], dtype=float32)]
[array([ 0.9169184 ,  2.0649323 ,  0.11138654, -0.45060897], dtype=float32)]
[array([ 0.9582171 ,  1.8684258 ,  0.10237436, -0.12499585], dtype=float32)]
[array([ 0.99558556,  2.0619435 ,  0.09987444, -0.38370705], dtype=float32)]
[array([ 1.0368245 ,  1.8655559 ,  0.0922003 , -0.06127932], dtype=float32)]
[array([1.0741355 , 1.6692412 , 0.09097471, 0.2590104 ], dtype=float32)]
[array([ 1.1075203 ,  1.8629546 ,  0.09615492, -0.00364838], dtype=float32)]
[array([ 1.1447794 ,  2.0565755 ,  0.09608196, -0.26451206], dtype=float32)]
[ar

Iteration 2:  75%|███████▌  | 75/100 [00:05<00:02,  8.33it/s, episode=270, return=110.500]

[array([-1.4904635 , -0.88817436, -0.12039512, -0.5020862 ], dtype=float32)]
[array([-1.508227  , -1.0814118 , -0.13043684, -0.24963887], dtype=float32)]
[array([-1.5298553 , -0.88469183, -0.13542962, -0.5804521 ], dtype=float32)]
[array([-1.547549  , -1.0776821 , -0.14703867, -0.333312  ], dtype=float32)]
[array([-1.5691028 , -1.2704388 , -0.1537049 , -0.09037083], dtype=float32)]
[array([-1.5945115 , -1.073486  , -0.15551232, -0.42732853], dtype=float32)]
[array([-1.6159812 , -0.87654287, -0.1640589 , -0.76471484], dtype=float32)]
[array([-1.633512  , -1.0690714 , -0.17935318, -0.5278171 ], dtype=float32)]
[array([-1.6548935 , -0.87193954, -0.18990953, -0.87121916], dtype=float32)]
[array([-1.6723323 , -0.6748128 , -0.20733391, -1.2170916 ], dtype=float32)]
(array([-0.01269464,  0.00504219,  0.02070804,  0.01432306], dtype=float32), {})
[array([-0.04668038,  0.04396644, -0.00414801,  0.0204221 ], dtype=float32)]
[array([-0.04580105, -0.15109578, -0.00373957,  0.3117934 ], dtype=float

Iteration 2:  76%|███████▌  | 76/100 [00:05<00:02,  8.47it/s, episode=270, return=110.500]

[array([-1.2730712 , -1.3939976 , -0.09463301, -0.35273564], dtype=float32)]
[array([-1.3009511 , -1.1976662 , -0.10168771, -0.6736955 ], dtype=float32)]
[array([-1.3249044 , -1.3912388 , -0.11516163, -0.41468117], dtype=float32)]
[array([-1.3527292 , -1.5845562 , -0.12345525, -0.16040847], dtype=float32)]
[array([-1.3844203 , -1.387903  , -0.12666342, -0.489347  ], dtype=float32)]
[array([-1.4121784 , -1.191243  , -0.13645037, -0.8191133 ], dtype=float32)]
[array([-1.4360032 , -1.3842599 , -0.15283263, -0.57227105], dtype=float32)]
[array([-1.4636885 , -1.5769459 , -0.16427805, -0.33136913], dtype=float32)]
[array([-1.4952273 , -1.769395  , -0.17090543, -0.09466291], dtype=float32)]
[array([-1.5306152 , -1.9617078 , -0.1727987 ,  0.13960172], dtype=float32)]
[array([-1.5698494 , -2.153988  , -0.17000665,  0.37317625], dtype=float32)]
[array([-1.6129292 , -1.9569104 , -0.16254313,  0.03207998], dtype=float32)]
[array([-1.6520674 , -1.7598758 , -0.16190153, -0.30715358], dtype=float32)]

Iteration 2:  78%|███████▊  | 78/100 [00:05<00:02,  9.89it/s, episode=270, return=110.500]

(array([ 0.00576369, -0.02888989, -0.00790862,  0.02610484], dtype=float32), {})
[array([ 0.01065691, -0.01824898, -0.04071601, -0.01918237], dtype=float32)]
[array([ 0.01029193, -0.21276408, -0.04109966,  0.26038128], dtype=float32)]
[array([ 0.00603665, -0.01708026, -0.03589203, -0.04497649], dtype=float32)]
[array([ 0.00569505,  0.17853749, -0.03679156, -0.34876412], dtype=float32)]
[array([ 0.0092658 , -0.01604239, -0.04376685, -0.0679061 ], dtype=float32)]
[array([ 0.00894495,  0.17967883, -0.04512497, -0.37407014], dtype=float32)]
[array([ 0.01253853, -0.01477405, -0.05260637, -0.09594998], dtype=float32)]
[array([ 0.01224304, -0.20910409, -0.05452537,  0.1796827 ], dtype=float32)]
[array([ 0.00806096, -0.013246  , -0.05093171, -0.12969035], dtype=float32)]
[array([ 0.00779604,  0.18256715, -0.05352552, -0.43799666], dtype=float32)]
[array([ 0.01144738, -0.01175795, -0.06228545, -0.16265544], dtype=float32)]
[array([ 0.01121223,  0.1841978 , -0.06553856, -0.4743192 ], dtype=float

Iteration 2:  80%|████████  | 80/100 [00:06<00:02,  8.49it/s, episode=280, return=134.600]

[array([ 0.22728138,  0.01098008,  0.01088704, -0.00731507], dtype=float32)]
[array([ 0.22750099, -0.1842963 ,  0.01074074,  0.28878286], dtype=float32)]
[array([ 0.22381505,  0.01067086,  0.01651639, -0.00049328], dtype=float32)]
[array([ 0.22402848,  0.20555209,  0.01650653, -0.28791967], dtype=float32)]
[array([ 0.22813952,  0.40043482,  0.01074814, -0.57535124], dtype=float32)]
[array([ 2.3614821e-01,  5.9540445e-01, -7.5888855e-04, -8.6462891e-01],
      dtype=float32)]
[array([ 0.2480563 ,  0.40029284, -0.01805147, -0.5721847 ], dtype=float32)]
[array([ 0.25606215,  0.2054286 , -0.02949516, -0.28524277], dtype=float32)]
[array([ 0.26017073,  0.01073945, -0.03520001, -0.00200639], dtype=float32)]
[array([ 0.2603855 ,  0.20634808, -0.03524014, -0.30558425], dtype=float32)]
[array([ 0.26451248,  0.01174557, -0.04135183, -0.02422018], dtype=float32)]
[array([ 0.26474738, -0.18275972, -0.04183623,  0.25513437], dtype=float32)]
[array([ 0.2610922 ,  0.01293383, -0.03673355, -0.05044536

Iteration 2:  82%|████████▏ | 82/100 [00:06<00:01, 10.08it/s, episode=280, return=134.600]

[array([-0.38423944, -0.89114875, -0.09733025, -0.03289126], dtype=float32)]
[array([-0.40206242, -1.0847499 , -0.09798808,  0.22756621], dtype=float32)]
[array([-0.42375743, -0.88837415, -0.09343676, -0.09434688], dtype=float32)]
[array([-0.4415249 , -0.69204587, -0.09532369, -0.41498524], dtype=float32)]
[array([-0.45536584, -0.8856966 , -0.1036234 , -0.15381055], dtype=float32)]
[array([-0.47307977, -1.079194  , -0.10669961,  0.10446779], dtype=float32)]
[array([-0.49466363, -1.2726378 , -0.10461025,  0.3616733 ], dtype=float32)]
[array([-0.5201164 , -1.0761967 , -0.09737679,  0.03792288], dtype=float32)]
[array([-0.54164034, -0.8798229 , -0.09661833, -0.28382578], dtype=float32)]
[array([-0.55923676, -0.68346524, -0.10229485, -0.60535026], dtype=float32)]
[array([-0.5729061 , -0.87701917, -0.11440185, -0.34655935], dtype=float32)]
[array([-0.5904465 , -1.0703437 , -0.12133304, -0.09202887], dtype=float32)]
[array([-0.61185336, -1.2635366 , -0.12317362,  0.16004539], dtype=float32)]

Iteration 2:  84%|████████▍ | 84/100 [00:06<00:01, 11.72it/s, episode=280, return=134.600]

[array([-0.11055978, -0.70905554, -0.11489455,  0.3389448 ], dtype=float32)]
[array([-0.12474089, -0.9023711 , -0.10811566,  0.59330165], dtype=float32)]
[array([-0.14278832, -0.70591515, -0.09624963,  0.2686141 ], dtype=float32)]
[array([-0.15690662, -0.50956076, -0.09087734, -0.05280792], dtype=float32)]
[array([-0.16709784, -0.3132612 , -0.0919335 , -0.37272337], dtype=float32)]
[array([-0.17336306, -0.5069651 , -0.09938797, -0.11038492], dtype=float32)]
[array([-0.18350236, -0.7005328 , -0.10159566,  0.14936176], dtype=float32)]
[array([-0.19751301, -0.89406425, -0.09860843,  0.4083443 ], dtype=float32)]
[array([-0.2153943 , -0.6976926 , -0.09044155,  0.08627374], dtype=float32)]
[array([-0.22934815, -0.50139844, -0.08871607, -0.23351763], dtype=float32)]
[array([-0.23937613, -0.30512828, -0.09338642, -0.5528134 ], dtype=float32)]
[array([-0.24547869, -0.49882326, -0.10444269, -0.29095262], dtype=float32)]
[array([-0.25545517, -0.30237916, -0.11026175, -0.6146634 ], dtype=float32)]

Iteration 2:  86%|████████▌ | 86/100 [00:06<00:01, 10.26it/s, episode=280, return=134.600]

[array([ 1.5788966 ,  2.0050898 ,  0.06158191, -0.28885385], dtype=float32)]
[array([ 1.6189984 ,  2.199282  ,  0.05580483, -0.5614964 ], dtype=float32)]
[array([ 1.662984  ,  2.003423  ,  0.04457491, -0.25176728], dtype=float32)]
[array([ 1.7030525 ,  2.197881  ,  0.03953956, -0.5300639 ], dtype=float32)]
[array([ 1.7470101 ,  2.0022259 ,  0.02893828, -0.22518887], dtype=float32)]
[array([ 1.7870547 ,  2.1969225 ,  0.0244345 , -0.50860494], dtype=float32)]
[array([ 1.830993 ,  2.391692 ,  0.0142624, -0.7934887], dtype=float32)]
[array([ 1.8788270e+00,  2.5866151e+00, -1.6073700e-03, -1.0816509e+00],
      dtype=float32)]
[array([ 1.9305593 ,  2.3915145 , -0.02324039, -0.78947276], dtype=float32)]
[array([ 1.9783895 ,  2.5869477 , -0.03902984, -1.0893756 ], dtype=float32)]
[array([ 2.0301285 ,  2.7825618 , -0.06081735, -1.3940452 ], dtype=float32)]
[array([ 2.0857797 ,  2.5882473 , -0.08869826, -1.1209815 ], dtype=float32)]
[array([ 2.1375446 ,  2.3943934 , -0.11111789, -0.85738695], d

Iteration 2:  88%|████████▊ | 88/100 [00:06<00:01, 11.16it/s, episode=280, return=134.600]

[array([0.13228709, 0.5877666 , 0.1894999 , 0.18504317], dtype=float32)]
[array([ 0.14404242,  0.7797433 ,  0.19320077, -0.04238679], dtype=float32)]
[array([0.15963727, 0.5824515 , 0.19235303, 0.30449322], dtype=float32)]
[array([0.17128631, 0.38518327, 0.19844289, 0.65114194], dtype=float32)]
(array([-0.00530585,  0.02972227, -0.04345278,  0.01174856], dtype=float32), {})
[array([ 0.03144857, -0.01368414, -0.004955  , -0.0197304 ], dtype=float32)]
[array([ 0.03117488,  0.18150853, -0.00534961, -0.31397256], dtype=float32)]
[array([ 0.03480505,  0.37670627, -0.01162906, -0.60833776], dtype=float32)]
[array([ 0.04233918,  0.1817488 , -0.02379582, -0.31934023], dtype=float32)]
[array([ 0.04597415, -0.0130263 , -0.03018262, -0.03425558], dtype=float32)]
[array([ 0.04571363,  0.18251519, -0.03086773, -0.33630657], dtype=float32)]
[array([ 0.04936393,  0.37806252, -0.03759386, -0.6385615 ], dtype=float32)]
[array([ 0.05692518,  0.18348436, -0.05036509, -0.35795048], dtype=float32)]
[array(

Iteration 2:  90%|█████████ | 90/100 [00:07<00:00, 10.92it/s, episode=290, return=96.100] 

[array([-0.04650131,  0.3372944 ,  0.08249841, -0.08775894], dtype=float32)]
[array([-0.03975542,  0.5311429 ,  0.08074323, -0.35331568], dtype=float32)]
[array([-0.02913256,  0.33497116,  0.07367691, -0.03630408], dtype=float32)]
[array([-0.02243314,  0.13887425,  0.07295083,  0.27868474], dtype=float32)]
[array([-0.01965566,  0.3328838 ,  0.07852453,  0.00987296], dtype=float32)]
[array([-0.01299798,  0.13672864,  0.07872199,  0.32626152], dtype=float32)]
[array([-0.01026341, -0.05942072,  0.08524722,  0.6426947 ], dtype=float32)]
[array([-0.01145182,  0.13441607,  0.09810112,  0.3780277 ], dtype=float32)]
[array([-0.0087635 ,  0.3280179 ,  0.10566167,  0.11781676], dtype=float32)]
[array([-0.00220314,  0.13155316,  0.108018  ,  0.44187707], dtype=float32)]
[array([0.00042792, 0.32499403, 0.11685555, 0.18510248], dtype=float32)]
[array([ 0.0069278 ,  0.518267  ,  0.12055759, -0.06855241], dtype=float32)]
[array([0.01729314, 0.32164142, 0.11918655, 0.25960335], dtype=float32)]
[array(

Iteration 2:  92%|█████████▏| 92/100 [00:07<00:00,  9.64it/s, episode=290, return=96.100]

[array([-1.2479643 , -2.4094107 , -0.15695204,  0.60930395], dtype=float32)]
[array([-1.2961525 , -2.2124834 , -0.14476596,  0.27158898], dtype=float32)]
[array([-1.3404021 , -2.4052749 , -0.13933419,  0.51533854], dtype=float32)]
[array([-1.3885077 , -2.2084942 , -0.12902741,  0.18219621], dtype=float32)]
[array([-1.4326775 , -2.0117846 , -0.12538348, -0.14824416], dtype=float32)]
[array([-1.4729133 , -2.2049088 , -0.12834837,  0.10240188], dtype=float32)]
[array([-1.5170114 , -2.0082035 , -0.12630033, -0.22786015], dtype=float32)]
[array([-1.5571755 , -2.2013154 , -0.13085753,  0.02246689], dtype=float32)]
[array([-1.6012018 , -2.0045834 , -0.1304082 , -0.30847073], dtype=float32)]
[array([-1.6412935 , -2.1976295 , -0.1365776 , -0.05959185], dtype=float32)]
[array([-1.6852461 , -2.0008404 , -0.13776945, -0.39205563], dtype=float32)]
[array([-1.7252629 , -1.8040597 , -0.14561056, -0.7248061 ], dtype=float32)]
[array([-1.7613441 , -1.9969001 , -0.16010669, -0.481265  ], dtype=float32)]

Iteration 2:  94%|█████████▍| 94/100 [00:07<00:00, 10.87it/s, episode=290, return=96.100]

[array([-0.02850025,  0.22980103, -0.07350117, -0.6326289 ], dtype=float32)]
[array([-0.02390423,  0.03577728, -0.08615375, -0.363969  ], dtype=float32)]
[array([-0.02318868, -0.15802133, -0.09343313, -0.09964713], dtype=float32)]
[array([-0.02634911, -0.35168865, -0.09542607,  0.16215739], dtype=float32)]
[array([-0.03338289, -0.15533933, -0.09218292, -0.15903935], dtype=float32)]
[array([-0.03648967, -0.3490289 , -0.09536371,  0.10319804], dtype=float32)]
[array([-0.04347025, -0.542664  , -0.09329975,  0.36433652], dtype=float32)]
[array([-0.05432353, -0.3463485 , -0.08601302,  0.04375365], dtype=float32)]
[array([-0.0612505 , -0.5401385 , -0.08513794,  0.30810735], dtype=float32)]
[array([-0.07205327, -0.3439131 , -0.0789758 , -0.01016492], dtype=float32)]
[array([-0.07893153, -0.53781885, -0.07917909,  0.2565923 ], dtype=float32)]
[array([-0.08968791, -0.34166095, -0.07404725, -0.05997698], dtype=float32)]
[array([-0.09652112, -0.5356474 , -0.07524679,  0.20845534], dtype=float32)]

Iteration 2:  96%|█████████▌| 96/100 [00:07<00:00,  9.43it/s, episode=290, return=96.100]

[array([ 1.0423889 ,  0.7823322 , -0.12163068, -0.28646165], dtype=float32)]
[array([ 1.0580356 ,  0.5891359 , -0.12735991, -0.03447942], dtype=float32)]
[array([ 1.0698183 ,  0.7858323 , -0.12804951, -0.3644771 ], dtype=float32)]
[array([ 1.0855349 ,  0.5927406 , -0.13533905, -0.11475463], dtype=float32)]
[array([ 1.0973897 ,  0.78951603, -0.13763414, -0.44688705], dtype=float32)]
[array([ 1.11318   ,  0.98628926, -0.14657189, -0.77959245], dtype=float32)]
[array([ 1.1329058 ,  1.1830896 , -0.16216373, -1.1145656 ], dtype=float32)]
[array([ 1.1565677 ,  0.9904245 , -0.18445505, -0.8768271 ], dtype=float32)]
[array([ 1.1763761 ,  0.79822385, -0.20199159, -0.6473395 ], dtype=float32)]
(array([-0.02090489, -0.00784134, -0.03206561,  0.03813954], dtype=float32), {})
[array([ 0.04335821,  0.01417633,  0.03882749, -0.01304094], dtype=float32)]
[array([ 0.04364174,  0.20872055,  0.03856667, -0.29322493], dtype=float32)]
[array([ 0.04781615,  0.40327203,  0.03270217, -0.5734993 ], dtype=float

Iteration 2:  98%|█████████▊| 98/100 [00:08<00:00,  8.96it/s, episode=290, return=96.100]

[array([0.01952871, 0.03272912, 0.01961307, 0.0119397 ], dtype=float32)]
[array([ 0.0201833 , -0.16266854,  0.01985186,  0.3107457 ], dtype=float32)]
[array([0.01692992, 0.03216504, 0.02606677, 0.02438897], dtype=float32)]
[array([ 0.01757323, -0.16332084,  0.02655455,  0.32518095], dtype=float32)]
[array([ 0.01430681, -0.35881063,  0.03305817,  0.62611836], dtype=float32)]
[array([ 0.0071306 , -0.55437803,  0.04558054,  0.92902654], dtype=float32)]
[array([-0.00395697, -0.3599001 ,  0.06416107,  0.6510086 ], dtype=float32)]
[array([-0.01115497, -0.16572766,  0.07718124,  0.3791997 ], dtype=float32)]
[array([-0.01446952, -0.36185598,  0.08476524,  0.6951851 ], dtype=float32)]
[array([-0.02170664, -0.16800562,  0.09866894,  0.43034476], dtype=float32)]
[array([-0.02506675,  0.02559077,  0.10727584,  0.1703242 ], dtype=float32)]
[array([-0.02455494,  0.21902677,  0.11068232, -0.0866832 ], dtype=float32)]
[array([-0.0201744 ,  0.41240248,  0.10894866, -0.34249765], dtype=float32)]
[array(

Iteration 2: 100%|██████████| 100/100 [00:08<00:00, 12.23it/s, episode=300, return=120.200]


[array([ 1.2395344 ,  0.8669687 , -0.16554034,  0.19578134], dtype=float32)]
[array([ 1.2568737,  0.6745538, -0.1616247,  0.4320111], dtype=float32)]
[array([ 1.2703648 ,  0.871551  , -0.15298449,  0.09305549], dtype=float32)]
[array([ 1.2877958 ,  0.678915  , -0.15112337,  0.33383244], dtype=float32)]
[array([ 1.3013741 ,  0.8758285 , -0.14444673, -0.00243269], dtype=float32)]
[array([ 1.3188907 ,  0.68304175, -0.14449538,  0.24141607], dtype=float32)]
[array([ 1.3325515 ,  0.8799004 , -0.13966706, -0.0931286 ], dtype=float32)]
[array([ 1.3501495 ,  0.6870279 , -0.14152963,  0.15243429], dtype=float32)]
[array([ 1.36389   ,  0.8838628 , -0.13848095, -0.18133692], dtype=float32)]
[array([ 1.3815674 ,  0.69096607, -0.14210768,  0.06465478], dtype=float32)]
[array([ 1.3953867 ,  0.8878092 , -0.14081459, -0.26927277], dtype=float32)]
[array([ 1.4131428 ,  0.69494826, -0.14620005, -0.02410748], dtype=float32)]
[array([ 1.4270418 ,  0.89183134, -0.14668219, -0.3591114 ], dtype=float32)]
[ar

Iteration 3:   0%|          | 0/100 [00:00<?, ?it/s]

(array([-0.02584637,  0.04471739,  0.00231916,  0.02110547], dtype=float32), {})
[array([ 0.0017123 , -0.03360755, -0.04182786, -0.04658482], dtype=float32)]
[array([ 0.00104015, -0.22810553, -0.04275955,  0.23261318], dtype=float32)]
[array([-0.00352196, -0.03239951, -0.03810729, -0.073245  ], dtype=float32)]
[array([-0.00416995,  0.16324745, -0.03957219, -0.37770322], dtype=float32)]
[array([-0.000905  , -0.0312908 , -0.04712625, -0.09775548], dtype=float32)]
[array([-0.00153082,  0.16447379, -0.04908136, -0.40492633], dtype=float32)]
[array([ 0.00175866, -0.02991901, -0.05717989, -0.1281124 ], dtype=float32)]
[array([ 0.00116028,  0.16597347, -0.05974214, -0.43827263], dtype=float32)]
[array([ 0.00447975, -0.02825426, -0.06850759, -0.16500455], dtype=float32)]
[array([ 0.00391466, -0.22233206, -0.07180768,  0.10530379], dtype=float32)]
[array([-0.00053198, -0.4163554 , -0.0697016 ,  0.37449548], dtype=float32)]
[array([-0.00885909, -0.22031617, -0.0622117 ,  0.06067498], dtype=float

Iteration 3:   1%|          | 1/100 [00:00<00:19,  5.06it/s]

[array([-1.5764283 , -0.5732653 , -0.09066878, -0.17741428], dtype=float32)]
[array([-1.5878936 , -0.37697065, -0.09421707, -0.4972669 ], dtype=float32)]
[array([-1.595433  , -0.18065532, -0.1041624 , -0.81809276], dtype=float32)]
[array([-1.5990461 , -0.374209  , -0.12052426, -0.5599042 ], dtype=float32)]
[array([-1.6065303 , -0.56745154, -0.13172235, -0.3074916 ], dtype=float32)]
[array([-1.6178794 , -0.7604746 , -0.13787217, -0.0590796 ], dtype=float32)]
[array([-1.6330888 , -0.5636728 , -0.13905376, -0.39188716], dtype=float32)]
[array([-1.6443623 , -0.7565757 , -0.1468915 , -0.14607593], dtype=float32)]
[array([-1.6594938 , -0.559689  , -0.14981303, -0.48125657], dtype=float32)]
[array([-1.6706876 , -0.36280513, -0.15943816, -0.8171548 ], dtype=float32)]
[array([-1.6779437 , -0.55542725, -0.17578126, -0.5785628 ], dtype=float32)]
[array([-1.6890522 , -0.35833406, -0.18735251, -0.92106074], dtype=float32)]
[array([-1.6962188 , -0.5504979 , -0.20577373, -0.692619  ], dtype=float32)]

Iteration 3:   2%|▏         | 2/100 [00:00<00:16,  5.83it/s]

[array([-1.3678458 , -0.51394945, -0.09837305, -0.70457435], dtype=float32)]
[array([-1.3781247 , -0.7075806 , -0.11246453, -0.44440693], dtype=float32)]
[array([-1.3922764 , -0.511062  , -0.12135267, -0.77031744], dtype=float32)]
[array([-1.4024975 , -0.70432353, -0.13675901, -0.51814735], dtype=float32)]
[array([-1.416584  , -0.5075679 , -0.14712197, -0.85060704], dtype=float32)]
[array([-1.4267354 , -0.7004105 , -0.1641341 , -0.60756606], dtype=float32)]
[array([-1.4407437 , -0.50342035, -0.17628543, -0.9471191 ], dtype=float32)]
[array([-1.450812  , -0.6957867 , -0.19522782, -0.7146014 ], dtype=float32)]
(array([-0.01942972,  0.02636002,  0.01838975,  0.01431589], dtype=float32), {})
[array([-0.04860502,  0.03164744,  0.01187362,  0.01065635], dtype=float32)]
[array([-0.04797207,  0.22659712,  0.01208675, -0.27825677], dtype=float32)]
[array([-0.04344013,  0.03130484,  0.00652161,  0.01821365], dtype=float32)]
[array([-0.04281403, -0.16391003,  0.00688589,  0.31294706], dtype=float

Iteration 3:   3%|▎         | 3/100 [00:00<00:15,  6.07it/s]

[array([ 1.0274358 ,  0.9557955 , -0.04462154, -0.31795713], dtype=float32)]
[array([ 1.0465517 ,  1.1515237 , -0.05098068, -0.6243715 ], dtype=float32)]
[array([ 1.0695821 ,  0.95714915, -0.06346811, -0.34817034], dtype=float32)]
[array([ 1.0887251 ,  1.1531137 , -0.07043152, -0.66017246], dtype=float32)]
[array([ 1.1117874 ,  0.9590389 , -0.08363497, -0.39047176], dtype=float32)]
[array([ 1.1309682 ,  0.76519734, -0.0914444 , -0.12528579], dtype=float32)]
[array([ 1.1462722 ,  0.57149637, -0.09395012,  0.13720457], dtype=float32)]
[array([ 1.1577021 ,  0.7678296 , -0.09120602, -0.18357728], dtype=float32)]
[array([ 1.1730586 ,  0.574123  , -0.09487757,  0.0789975 ], dtype=float32)]
[array([ 1.1845411 ,  0.770468  , -0.09329762, -0.24204753], dtype=float32)]
[array([ 1.1999505 ,  0.9667902 , -0.09813857, -0.5626399 ], dtype=float32)]
[array([ 1.2192863 ,  0.77317244, -0.10939137, -0.3024171 ], dtype=float32)]
[array([ 1.2347497 ,  0.5797658 , -0.11543971, -0.04613812], dtype=float32)]

Iteration 3:   5%|▌         | 5/100 [00:00<00:12,  7.66it/s]

[array([ 0.60618055,  1.4405813 ,  0.01197403, -0.3741386 ], dtype=float32)]
[array([ 0.6349922 ,  1.6355312 ,  0.00449125, -0.6630221 ], dtype=float32)]
[array([ 0.6677028 ,  1.440347  , -0.00876919, -0.3689284 ], dtype=float32)]
[array([ 0.6965098 ,  1.2453507 , -0.01614776, -0.07902338], dtype=float32)]
[array([ 0.7214168 ,  1.4407004 , -0.01772822, -0.3767569 ], dtype=float32)]
[array([ 0.7502308 ,  1.6360697 , -0.02526336, -0.67497647], dtype=float32)]
[array([ 0.7829522 ,  1.4413077 , -0.03876289, -0.39035347], dtype=float32)]
[array([ 0.8117783 ,  1.2467567 , -0.04656996, -0.11013965], dtype=float32)]
[array([ 0.83671343,  1.442514  , -0.04877276, -0.41714382], dtype=float32)]
[array([ 0.86556375,  1.638292  , -0.05711563, -0.7247951 ], dtype=float32)]
[array([ 0.89832956,  1.8341553 , -0.07161153, -1.0348939 ], dtype=float32)]
[array([ 0.9350127 ,  1.6400548 , -0.09230941, -0.76552516], dtype=float32)]
[array([ 0.9678138 ,  1.446317  , -0.10761991, -0.5032561 ], dtype=float32)]

Iteration 3:   5%|▌         | 5/100 [00:00<00:16,  5.91it/s]

[array([ 0.46594086,  0.39793703,  0.00349285, -0.04052249], dtype=float32)]
[array([ 0.4738996 ,  0.5930087 ,  0.0026824 , -0.33210135], dtype=float32)]
[array([ 0.4857598 ,  0.39784867, -0.00395963, -0.03857373], dtype=float32)]
[array([ 0.49371678,  0.20278373, -0.0047311 ,  0.25285727], dtype=float32)]
[array([ 4.9777243e-01,  3.9797291e-01,  3.2604265e-04, -4.1314170e-02],
      dtype=float32)]
[array([ 5.057319e-01,  5.930902e-01, -5.002407e-04, -3.338942e-01],
      dtype=float32)]
[array([ 0.5175937 ,  0.39797536, -0.00717812, -0.04136907], dtype=float32)]
[array([ 0.5255532 ,  0.20295708, -0.00800551,  0.24904047], dtype=float32)]
[array([ 0.52961236,  0.39819244, -0.0030247 , -0.04615678], dtype=float32)]
[array([ 0.5375762 ,  0.5933576 , -0.00394783, -0.3397925 ], dtype=float32)]
[array([ 0.54944336,  0.78853554, -0.01074368, -0.6337177 ], dtype=float32)]
[array([ 0.56521404,  0.5935651 , -0.02341804, -0.3444375 ], dtype=float32)]
[array([ 0.5770854 ,  0.39878395, -0.0303067




KeyboardInterrupt: 