### Import Dependencies

In [1]:
import gymnasium as gym
from keras.models import Model
from keras.layers import Input, Dense, Concatenate

from agent_redq import AgentREDQ

### Create Policy Network

In [2]:
state_input = Input(shape=(11))

hidden_units = 256

x = Dense(units=hidden_units, activation="relu")(state_input)
x = Dense(units=hidden_units, activation="relu")(x)

means = Dense(units=3)(x)
stddevs = Dense(units=3, activation="softplus")(x)

policy_network = Model(inputs=[state_input], outputs=[means, stddevs])

### Create Critic Network

In [3]:
state_input = Input(shape=(11))
action_input = Input(shape=(3))

input = Concatenate()([state_input, action_input])

hidden_units = 256

x = Dense(units=hidden_units, activation="relu")(input)
x = Dense(units=hidden_units, activation="relu")(x)

q_value = Dense(units=1)(x)

q_network = Model(inputs=[state_input, action_input], outputs=[q_value])

### Train REDQ Agent

In [4]:
#env = gym.make("Ant-v4", max_episode_steps=1000, healthy_z_range=(0.3, 0.9))
env = gym.make("Hopper-v4", max_episode_steps=1000)

redq_agent = AgentREDQ(policy_network, q_network, env)

#redq_agent.train(steps_per_epoch=5000, epochs=10, learning_rate=1e-3, entropy_reg=1.0)

redq_agent.train(steps_per_epoch=5000, epochs=10, update_after=1000, start_steps=1000, train_steps_per_env_step=1, ensemble_size=2, num_q_evals=1, learning_rate=1e-3, entropy_reg=0.2)

Setup Phase  -  Filling Replay Buffer


100%|██████████| 4000/4000 [00:03<00:00, 1029.33it/s]


cri

Training Phase  -  Training Model 2023-10-04-1346


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=50.8, policy_loss=-2.25]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=39.1, policy_loss=-2.59]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=30.5, policy_loss=-2.98]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=26.4, policy_loss=-3.34]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=25.8, policy_loss=-3.73]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=24.2, policy_loss=-3.99]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=21.2, policy_loss=-4.13]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=22.9, policy_loss=-4.27]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=20, policy_loss=-4.32]  

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=19.3, policy_loss=-4.37]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=18.1, policy_loss=-4.34]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=16.8, policy_loss=-4.28]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=14.8, policy_loss=-4.22]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=14.2, policy_loss=-4.12]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=14.6, policy_loss=-4.12]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=11.9, policy_loss=-3.99]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=13.9, policy_loss=-3.97]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=13.1, policy_loss=-3.93]

cri


Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s, critic_loss=11.9, policy_loss=-3.91]

cri
pol


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=12.4, policy_loss=-3.94]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=10.7, policy_loss=-4.07]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=10, policy_loss=-4.13]  

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=8.92, policy_loss=-4.17]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=8.95, policy_loss=-4.24]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=8.39, policy_loss=-4.29]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=7.8, policy_loss=-4.37] 

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=7.11, policy_loss=-4.42]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=7.58, policy_loss=-4.5] 

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=6.31, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=6.76, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=6.66, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=6.88, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=5.2, policy_loss=-4.57] 

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=5.46, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=5.4, policy_loss=-4.5]  

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=5.26, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=4.43, policy_loss=-4.44]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=4.67, policy_loss=-4.44]

cri


Epoch 1/10:   0%|          | 1/5000 [00:01<1:53:45,  1.37s/it, critic_loss=4.27, policy_loss=-4.45]

cri
pol


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=4.58, policy_loss=-4.44]  

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=4.38, policy_loss=-4.46]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=5.23, policy_loss=-4.46]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=4.24, policy_loss=-4.41]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=5.1, policy_loss=-4.48] 

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=4.4, policy_loss=-4.47]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.88, policy_loss=-4.44]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=4.21, policy_loss=-4.48]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.98, policy_loss=-4.47]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.65, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.46, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=5.04, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=6.54, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=5.13, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.45, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.68, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.86, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=4.05, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.97, policy_loss=-4.5] 

cri


Epoch 1/10:   0%|          | 2/5000 [00:01<56:46,  1.47it/s, critic_loss=3.59, policy_loss=-4.42]

cri
pol


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.93, policy_loss=-4.45]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.54, policy_loss=-4.45]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.09, policy_loss=-4.46]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.11, policy_loss=-4.49]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.16, policy_loss=-4.47]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.13, policy_loss=-4.51]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.49, policy_loss=-4.46]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.03, policy_loss=-4.48]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.61, policy_loss=-4.45]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.07, policy_loss=-4.44]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.27, policy_loss=-4.4] 

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.94, policy_loss=-4.43]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.95, policy_loss=-4.38]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.09, policy_loss=-4.47]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.15, policy_loss=-4.47]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.3, policy_loss=-4.53] 

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=4.48, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.89, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.91, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 3/5000 [00:01<38:43,  2.15it/s, critic_loss=3.9, policy_loss=-4.58] 

cri
pol


Epoch 1/10:   0%|          | 4/5000 [00:01<30:20,  2.74it/s, critic_loss=4.15, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 4/5000 [00:01<30:20,  2.74it/s, critic_loss=3.45, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.18, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.87, policy_loss=-4.45]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.46, policy_loss=-4.46]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.75, policy_loss=-4.42]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.34, policy_loss=-4.42]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=4.71, policy_loss=-4.52]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.82, policy_loss=-4.47]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.35, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.66, policy_loss=-4.52]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.72, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.97, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=2.82, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.98, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.12, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=4.09, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.36, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.77, policy_loss=-4.48]

cri


Epoch 1/10:   0%|          | 4/5000 [00:02<30:20,  2.74it/s, critic_loss=3.53, policy_loss=-4.49]

cri
pol


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.87, policy_loss=-4.48]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.25, policy_loss=-4.48]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=2.8, policy_loss=-4.48] 

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.2, policy_loss=-4.46]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.38, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.32, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.22, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.03, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.39, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.32, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.14, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.35, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=4.09, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.1, policy_loss=-4.55] 

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.36, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.02, policy_loss=-4.5] 

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=2.62, policy_loss=-4.44]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.02, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.58, policy_loss=-4.48]

cri


Epoch 1/10:   0%|          | 5/5000 [00:02<25:47,  3.23it/s, critic_loss=3.16, policy_loss=-4.49]

cri
pol


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=3, policy_loss=-4.52]   

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.65, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=3.03, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.99, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.65, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.7, policy_loss=-4.61] 

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=3.35, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.8, policy_loss=-4.55] 

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.62, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=3.13, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.69, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.34, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.85, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.5, policy_loss=-4.67] 

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.91, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.41, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.59, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.8, policy_loss=-4.58] 

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=3.11, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 6/5000 [00:02<23:21,  3.56it/s, critic_loss=2.43, policy_loss=-4.49]

cri
pol


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=3.24, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.82, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.28, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=3.16, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.67, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.81, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.42, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.37, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.6, policy_loss=-4.59] 

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.07, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.71, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.18, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.61, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.63, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.04, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.59, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.32, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.4, policy_loss=-4.59] 

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.14, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 7/5000 [00:02<21:29,  3.87it/s, critic_loss=2.83, policy_loss=-4.55]

cri
pol


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.31, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.52, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.45, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.72, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.47, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.41, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.36, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.63, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.6, policy_loss=-4.57] 

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.74, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.07, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 8/5000 [00:02<20:42,  4.02it/s, critic_loss=2.31, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.67, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.28, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.28, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.67, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.8, policy_loss=-4.65] 

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.73, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.11, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 8/5000 [00:03<20:42,  4.02it/s, critic_loss=2.18, policy_loss=-4.58]

cri
pol


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.21, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.74, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.16, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.1, policy_loss=-4.57] 

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=1.97, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.56, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.24, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.22, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.18, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=1.93, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.41, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.39, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.15, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.4, policy_loss=-4.63] 

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.22, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.17, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.07, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2.52, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=2, policy_loss=-4.57]   

cri


Epoch 1/10:   0%|          | 9/5000 [00:03<20:22,  4.08it/s, critic_loss=1.67, policy_loss=-4.57]

cri
pol


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.14, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.25, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.75, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.2, policy_loss=-4.65] 

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.95, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.97, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.04, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.06, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.76, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.83, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.09, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.74, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.98, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.69, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.78, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.77, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.25, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.65, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=2.33, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 10/5000 [00:03<19:46,  4.20it/s, critic_loss=1.9, policy_loss=-4.63] 

cri
pol


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=2.49, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=2.05, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.94, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.62, policy_loss=-4.6]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=2.61, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.76, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.85, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.57, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.87, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.76, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.91, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.89, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.9, policy_loss=-4.62] 

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.72, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.72, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=2.17, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=2.15, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=2.08, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.88, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 11/5000 [00:03<19:04,  4.36it/s, critic_loss=1.76, policy_loss=-4.66]

cri
pol


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.78, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.57, policy_loss=-4.6]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.75, policy_loss=-4.6]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=2, policy_loss=-4.59]  

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.84, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.75, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.75, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.86, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.93, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.67, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.73, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.49, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.78, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=2.32, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.71, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.99, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.68, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.8, policy_loss=-4.68] 

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.49, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 12/5000 [00:03<19:00,  4.37it/s, critic_loss=1.57, policy_loss=-4.62]

cri
pol


Epoch 1/10:   0%|          | 13/5000 [00:03<18:48,  4.42it/s, critic_loss=1.9, policy_loss=-4.6]  

cri


Epoch 1/10:   0%|          | 13/5000 [00:03<18:48,  4.42it/s, critic_loss=1.61, policy_loss=-4.6]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.78, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=2.39, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.81, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.78, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.92, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.86, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.67, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.58, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=2, policy_loss=-4.61]   

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.75, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=2.37, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.87, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.61, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.78, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.71, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=2.23, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=1.66, policy_loss=-4.52]

cri


Epoch 1/10:   0%|          | 13/5000 [00:04<18:48,  4.42it/s, critic_loss=2.2, policy_loss=-4.56] 

cri
pol


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.57, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2.09, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.94, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.88, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2.03, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2.74, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.83, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.64, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.87, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.9, policy_loss=-4.57] 

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.52, policy_loss=-4.53]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.71, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.82, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2.13, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.64, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.73, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2.33, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=1.73, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2.24, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 14/5000 [00:04<18:18,  4.54it/s, critic_loss=2, policy_loss=-4.59]   

cri
pol


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.63, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=2.02, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.68, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.78, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.76, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.82, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=2.07, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.72, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.54, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.55, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=2.11, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.87, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.82, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.74, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.8, policy_loss=-4.64] 

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.63, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.65, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.24, policy_loss=-4.51]

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=2, policy_loss=-4.55]   

cri


Epoch 1/10:   0%|          | 15/5000 [00:04<17:34,  4.73it/s, critic_loss=1.98, policy_loss=-4.59]

cri
pol


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.92, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.7, policy_loss=-4.71] 

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=2.2, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.88, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.54, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.52, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.66, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.61, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=2.03, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.57, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.69, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.68, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.88, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.5, policy_loss=-4.64] 

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.75, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.54, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.75, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=2.01, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=1.67, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 16/5000 [00:04<17:21,  4.79it/s, critic_loss=2.16, policy_loss=-4.71]

cri
pol


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.92, policy_loss=-4.71]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.65, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.58, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.77, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.63, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.86, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.7, policy_loss=-4.62] 

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.72, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.72, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.47, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.65, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.41, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.37, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.69, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.99, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.88, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.52, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.93, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.69, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 17/5000 [00:04<17:13,  4.82it/s, critic_loss=1.36, policy_loss=-4.62]

cri
pol


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.4, policy_loss=-4.66] 

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.64, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.78, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.48, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.55, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.45, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.57, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.31, policy_loss=-4.71]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.65, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.35, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.83, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.55, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.31, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.65, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.68, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.28, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.81, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.41, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 18/5000 [00:05<17:33,  4.73it/s, critic_loss=1.33, policy_loss=-4.69]

cri
pol


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.54, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.41, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.3, policy_loss=-4.57] 

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.39, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.44, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.37, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.4, policy_loss=-4.62] 

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.67, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.43, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.5, policy_loss=-4.57] 

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.68, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.66, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.39, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.76, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.67, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.4, policy_loss=-4.65] 

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.61, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.74, policy_loss=-4.72]

cri


Epoch 1/10:   0%|          | 19/5000 [00:05<17:16,  4.81it/s, critic_loss=1.68, policy_loss=-4.71]

cri
pol


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.58, policy_loss=-4.73]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.75, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.61, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.99, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.87, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.77, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.47, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.57, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.73, policy_loss=-4.74]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=2.08, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.55, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.55, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.92, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.6, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.57, policy_loss=-4.72]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.42, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.98, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 20/5000 [00:05<17:19,  4.79it/s, critic_loss=1.69, policy_loss=-4.64]

cri
pol


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.68, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.79, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.61, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.5, policy_loss=-4.61] 

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.73, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.42, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.64, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.46, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.66, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.88, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.55, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.83, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.54, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.56, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.67, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.65, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.53, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.53, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.5, policy_loss=-4.64] 

cri


Epoch 1/10:   0%|          | 21/5000 [00:05<17:58,  4.62it/s, critic_loss=1.75, policy_loss=-4.63]

cri
pol


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.74, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.55, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=2.37, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.58, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.85, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.66, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.94, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.51, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.83, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.65, policy_loss=-4.54]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.86, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.86, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 22/5000 [00:05<18:00,  4.61it/s, critic_loss=1.76, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.67, policy_loss=-4.71]

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.73, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.71, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.77, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.52, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.87, policy_loss=-4.55]

cri


Epoch 1/10:   0%|          | 22/5000 [00:06<18:00,  4.61it/s, critic_loss=1.66, policy_loss=-4.54]

cri
pol


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.68, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.57, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.84, policy_loss=-4.68]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.54, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.49, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.31, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.54, policy_loss=-4.58]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.51, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.38, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.68, policy_loss=-4.6] 

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.76, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=2.39, policy_loss=-4.67]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.51, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.96, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=2.08, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.92, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.86, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.85, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=1.54, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 23/5000 [00:06<18:07,  4.58it/s, critic_loss=3.8, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.85, policy_loss=-4.7]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.31, policy_loss=-4.7]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.26, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.37, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.67, policy_loss=-4.56]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.66, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.88, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.88, policy_loss=-4.72]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.54, policy_loss=-4.74]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.26, policy_loss=-4.74]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.73, policy_loss=-4.71]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.24, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.45, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.52, policy_loss=-4.57]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.25, policy_loss=-4.59]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.56, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.81, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 24/5000 [00:06<18:20,  4.52it/s, critic_loss=1.47, policy_loss=-4.71]

cri
pol


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.49, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.46, policy_loss=-4.69]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.56, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.54, policy_loss=-4.61]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.64, policy_loss=-4.63]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.56, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.6, policy_loss=-4.71]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.52, policy_loss=-4.66]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.38, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.72, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.25, policy_loss=-4.64]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.52, policy_loss=-4.62]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.55, policy_loss=-4.65]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.64, policy_loss=-4.7] 

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.59, policy_loss=-4.71]

cri


Epoch 1/10:   0%|          | 25/5000 [00:06<18:30,  4.48it/s, critic_loss=1.51, policy_loss=-4.72]

cri
pol


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.57, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.59, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.89, policy_loss=-4.56]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.18, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.54, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.77, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.45, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.27, policy_loss=-4.78]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.57, policy_loss=-4.76]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.62, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.35, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.22, policy_loss=-4.57]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.31, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.46, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.9, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.67, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.52, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 26/5000 [00:06<18:07,  4.57it/s, critic_loss=1.08, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 27/5000 [00:06<18:39,  4.44it/s, critic_loss=1.54, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.32, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.82, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.73, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.45, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.63, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.31, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.41, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.4, policy_loss=-4.59] 

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.5, policy_loss=-4.56]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.6, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.5, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.65, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.56, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.58, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.67, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.78, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 27/5000 [00:07<18:39,  4.44it/s, critic_loss=1.45, policy_loss=-4.55]

cri
pol


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.54, policy_loss=-4.52]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.98, policy_loss=-4.56]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.51, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.62, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.56, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.7, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.53, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.54, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.74, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.52, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.27, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.48, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.73, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.77, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.68, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.31, policy_loss=-4.56]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.83, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=2.15, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 28/5000 [00:07<19:26,  4.26it/s, critic_loss=1.56, policy_loss=-4.61]

cri
pol


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.78, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.63, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.31, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.53, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.67, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.46, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.96, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.41, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.41, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.32, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.52, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.55, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.26, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.66, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.4, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.36, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.34, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 29/5000 [00:07<18:46,  4.41it/s, critic_loss=1.45, policy_loss=-4.61]

cri
pol


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.86, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.37, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.31, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.58, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.51, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.63, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.59, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.37, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.59, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.82, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.43, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.53, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.14, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.37, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.54, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.55, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.52, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 30/5000 [00:07<18:08,  4.57it/s, critic_loss=1.3, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.98, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.5, policy_loss=-4.71] 

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.3, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.26, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.22, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.54, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.45, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.31, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.36, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.47, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.09, policy_loss=-4.56]

cri


Epoch 1/10:   1%|          | 31/5000 [00:07<17:54,  4.62it/s, critic_loss=1.82, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.32, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.39, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.44, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.55, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.37, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.43, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 31/5000 [00:08<17:54,  4.62it/s, critic_loss=1.38, policy_loss=-4.62]

cri
pol


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.53, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.36, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.43, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.37, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.17, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.48, policy_loss=-4.57]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.38, policy_loss=-4.54]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.71, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.39, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.47, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=2.02, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.47, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.26, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.2, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.27, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|          | 32/5000 [00:08<18:01,  4.59it/s, critic_loss=1.38, policy_loss=-4.68]

cri
pol


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.34, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.36, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.41, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.69, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.48, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.7, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.44, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.6, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.57, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=0.987, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.33, policy_loss=-4.59] 

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.43, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.97, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.44, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 33/5000 [00:08<18:04,  4.58it/s, critic_loss=1.15, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.62, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.23, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.48, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.46, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.24, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.84, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.58, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.23, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.54, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.26, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=3.04, policy_loss=-4.75]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.43, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.47, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.27, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.49, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 34/5000 [00:08<18:02,  4.59it/s, critic_loss=1.57, policy_loss=-4.68]

cri
pol


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.78, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.62, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.44, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.69, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=2.39, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.25, policy_loss=-4.6]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.48, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.56, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.66, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.34, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.29, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.45, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.62, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.71, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.58, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.74, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.5, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.84, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.73, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 35/5000 [00:08<17:56,  4.61it/s, critic_loss=1.59, policy_loss=-4.69]

cri
pol


Epoch 1/10:   1%|          | 36/5000 [00:08<17:49,  4.64it/s, critic_loss=1.75, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 36/5000 [00:08<17:49,  4.64it/s, critic_loss=1.52, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 36/5000 [00:08<17:49,  4.64it/s, critic_loss=1.9, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|          | 36/5000 [00:08<17:49,  4.64it/s, critic_loss=1.88, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 36/5000 [00:08<17:49,  4.64it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.15, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.76, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.49, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.48, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.44, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.57, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.61, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.61, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.4, policy_loss=-4.6]  

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.59, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.3, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=0.981, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.23, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|          | 36/5000 [00:09<17:49,  4.64it/s, critic_loss=1.54, policy_loss=-4.64]

cri
pol


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.41, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.24, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.42, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.13, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.14, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.43, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.46, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.03, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.76, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=0.993, policy_loss=-4.6]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.49, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.28, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.52, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.4, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.65, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 37/5000 [00:09<17:49,  4.64it/s, critic_loss=1.24, policy_loss=-4.73]

cri
pol


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.61, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.27, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=0.792, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=0.824, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.23, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.1, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.23, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.41, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.19, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=0.992, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.08, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.06, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.31, policy_loss=-4.56]

cri


Epoch 1/10:   1%|          | 38/5000 [00:09<17:17,  4.78it/s, critic_loss=1.71, policy_loss=-4.59]

cri
pol


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.998, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1, policy_loss=-4.66]    

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.94, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.782, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.42, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.806, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.19, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.943, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.813, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.05, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.55, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.814, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=0.784, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 39/5000 [00:09<17:56,  4.61it/s, critic_loss=1.36, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.979, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.946, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.869, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.3, policy_loss=-4.67]  

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.841, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1, policy_loss=-4.7]    

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.932, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.11, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.815, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.18, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.873, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=0.879, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 40/5000 [00:09<17:59,  4.60it/s, critic_loss=1.51, policy_loss=-4.73] 

cri


Epoch 1/10:   1%|          | 40/5000 [00:10<17:59,  4.60it/s, critic_loss=0.958, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 40/5000 [00:10<17:59,  4.60it/s, critic_loss=0.938, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.972, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.32, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.87, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.91, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.37, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.953, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.28, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.883, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.03, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.687, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.824, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.38, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=0.897, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.17, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.6, policy_loss=-4.76] 

cri


Epoch 1/10:   1%|          | 41/5000 [00:10<17:46,  4.65it/s, critic_loss=1.02, policy_loss=-4.69]

cri
pol


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.12, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.4, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.983, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.929, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.864, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.14, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.845, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.997, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.25, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.994, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.1, policy_loss=-4.65]  

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.92, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1, policy_loss=-4.69]   

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.35, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.827, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1, policy_loss=-4.61]    

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=1.09, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 42/5000 [00:10<17:43,  4.66it/s, critic_loss=0.832, policy_loss=-4.63]

cri
pol


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.32, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.17, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.42, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.04, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=0.98, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.23, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.41, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.64, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.45, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.15, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.02, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 43/5000 [00:10<17:45,  4.65it/s, critic_loss=1.05, policy_loss=-4.72]

cri
pol


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.13, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.28, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.3, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.01, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.32, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=0.957, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.52, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.59, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.5, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.42, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.47, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 44/5000 [00:10<17:42,  4.67it/s, critic_loss=1.18, policy_loss=-4.61]

cri
pol


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.24, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.95, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.31, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.67, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.76, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.92, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.27, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 45/5000 [00:10<17:23,  4.75it/s, critic_loss=1.4, policy_loss=-4.73] 

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.24, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.39, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.37, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.46, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.46, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.4, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 45/5000 [00:11<17:23,  4.75it/s, critic_loss=1.14, policy_loss=-4.63]

cri
pol


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.2, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.36, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.5, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.71, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.53, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.62, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.5, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.69, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.45, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.36, policy_loss=-4.75]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.41, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.3, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.53, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.55, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.58, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.6, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.94, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 46/5000 [00:11<18:08,  4.55it/s, critic_loss=1.53, policy_loss=-4.67]

cri
pol


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.47, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.38, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.46, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.63, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.54, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.39, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.72, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.67, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.74, policy_loss=-4.77]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.75, policy_loss=-4.79]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.54, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.74, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=2.07, policy_loss=-4.55]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.43, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.61, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.94, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.48, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 47/5000 [00:11<18:03,  4.57it/s, critic_loss=1.44, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.34, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.6, policy_loss=-4.6]  

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.23, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.58, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.46, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.57, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.69, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.45, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.64, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.57, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.52, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.85, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.46, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.64, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.31, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.81, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.42, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.42, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 48/5000 [00:11<18:37,  4.43it/s, critic_loss=1.69, policy_loss=-4.64]

cri
pol


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.43, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.43, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.4, policy_loss=-4.71] 

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.3, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.26, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.55, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.82, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.15, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 49/5000 [00:11<19:43,  4.18it/s, critic_loss=1.55, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 49/5000 [00:12<19:43,  4.18it/s, critic_loss=1.38, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 49/5000 [00:12<19:43,  4.18it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 49/5000 [00:12<19:43,  4.18it/s, critic_loss=1.39, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 49/5000 [00:12<19:43,  4.18it/s, critic_loss=1.47, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 49/5000 [00:12<19:43,  4.18it/s, critic_loss=1.6, policy_loss=-4.62] 

cri
pol


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.36, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.36, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.19, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1, policy_loss=-4.62]   

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.53, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.26, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.41, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.52, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.19, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.45, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.44, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.21, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.3, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.49, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.79, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.13, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 50/5000 [00:12<19:35,  4.21it/s, critic_loss=1.28, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=2.49, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.56, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.02, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.41, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.71, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.53, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.67, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.41, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.43, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.43, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.35, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.43, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 51/5000 [00:12<19:05,  4.32it/s, critic_loss=1.26, policy_loss=-4.59]

cri
pol


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.61, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=0.931, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.18, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.19, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.46, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.19, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.34, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.84, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.36, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=0.994, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.38, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=0.984, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 52/5000 [00:12<18:24,  4.48it/s, critic_loss=1.23, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.24, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.52, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.12, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.88, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.1, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.48, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.26, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=0.974, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.08, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.55, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.22, policy_loss=-4.6]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.26, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 53/5000 [00:12<18:04,  4.56it/s, critic_loss=1.29, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.29, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.62, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.17, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.19, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=0.929, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.34, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.22, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 54/5000 [00:12<18:05,  4.56it/s, critic_loss=1.78, policy_loss=-4.8]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.2, policy_loss=-4.76]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.29, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.32, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=0.946, policy_loss=-4.57]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.16, policy_loss=-4.57] 

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.23, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.38, policy_loss=-4.75]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.41, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.15, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=0.854, policy_loss=-4.57]

cri


Epoch 1/10:   1%|          | 54/5000 [00:13<18:05,  4.56it/s, critic_loss=1.12, policy_loss=-4.53] 

cri
pol


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.68, policy_loss=-4.54]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.46, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.3, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=0.949, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.24, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.08, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.24, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.11, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.04, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.03, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.01, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=0.973, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.38, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.27, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=0.961, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 55/5000 [00:13<17:45,  4.64it/s, critic_loss=1.15, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.97, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.875, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.975, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.864, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.915, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.11, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.28, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.778, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.715, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.961, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.07, policy_loss=-4.71] 

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.08, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.12, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.32, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 56/5000 [00:13<17:44,  4.64it/s, critic_loss=0.9, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1.17, policy_loss=-4.75]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.953, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.898, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.791, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.771, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1.21, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.972, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.99, policy_loss=-4.71] 

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.831, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1.25, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1.25, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.82, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.951, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1, policy_loss=-4.71]    

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.971, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.72, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.828, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 57/5000 [00:13<17:32,  4.70it/s, critic_loss=0.81, policy_loss=-4.6]  

cri
pol


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1, policy_loss=-4.6]   

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.936, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.833, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.12, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.46, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.952, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.12, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.863, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.11, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.787, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.881, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.08, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=0.913, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 58/5000 [00:13<17:45,  4.64it/s, critic_loss=1.14, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|          | 58/5000 [00:14<17:45,  4.64it/s, critic_loss=0.792, policy_loss=-4.7]

cri


Epoch 1/10:   1%|          | 58/5000 [00:14<17:45,  4.64it/s, critic_loss=1.3, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=0.826, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.01, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.3, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=0.858, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.11, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.29, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.2, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.05, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.51, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=0.794, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.75, policy_loss=-4.71] 

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=0.989, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=0.894, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=1.03, policy_loss=-4.58] 

cri


Epoch 1/10:   1%|          | 59/5000 [00:14<18:46,  4.39it/s, critic_loss=0.941, policy_loss=-4.62]

cri
pol


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.03, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.42, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.26, policy_loss=-4.74]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=0.935, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.4, policy_loss=-4.58]  

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.24, policy_loss=-4.55]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.18, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.54, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.31, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.03, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.28, policy_loss=-4.73]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.34, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.44, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.21, policy_loss=-4.58]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.59, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.17, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.68, policy_loss=-4.71]

cri


Epoch 1/10:   1%|          | 60/5000 [00:14<20:14,  4.07it/s, critic_loss=1.81, policy_loss=-4.65]

cri
pol


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.48, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.3, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.36, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.36, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.43, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.34, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.29, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.47, policy_loss=-4.62]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.43, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.37, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.34, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.36, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.09, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.26, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.29, policy_loss=-4.59]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.29, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 61/5000 [00:14<20:02,  4.11it/s, critic_loss=1.53, policy_loss=-4.66]

cri
pol


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.48, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.46, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.35, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.29, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.29, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.16, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 62/5000 [00:14<20:22,  4.04it/s, critic_loss=1.09, policy_loss=-4.61]

cri


Epoch 1/10:   1%|          | 62/5000 [00:15<20:22,  4.04it/s, critic_loss=1.28, policy_loss=-4.64]

cri


Epoch 1/10:   1%|          | 62/5000 [00:15<20:22,  4.04it/s, critic_loss=1.08, policy_loss=-4.64]

cri
pol


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.13, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.3, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.2, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.1, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.3, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.52, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=0.933, policy_loss=-4.7]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1, policy_loss=-4.69]   

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.45, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.15, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.11, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=0.877, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.47, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.32, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 63/5000 [00:15<19:32,  4.21it/s, critic_loss=1.01, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.07, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.23, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.876, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.979, policy_loss=-4.73]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.14, policy_loss=-4.71] 

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.87, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.26, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.17, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.767, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1, policy_loss=-4.66]    

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.954, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.34, policy_loss=-4.74] 

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.884, policy_loss=-4.73]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.989, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.07, policy_loss=-4.67] 

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.895, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=1.1, policy_loss=-4.62]  

cri


Epoch 1/10:   1%|▏         | 64/5000 [00:15<19:28,  4.22it/s, critic_loss=0.948, policy_loss=-4.64]

cri
pol


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.38, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=0.862, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.28, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=0.923, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=0.898, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.62, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=0.913, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=0.961, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=0.943, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.26, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.14, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.35, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|▏         | 65/5000 [00:15<19:33,  4.21it/s, critic_loss=1.07, policy_loss=-4.69]

cri
pol


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=0.857, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.47, policy_loss=-4.61] 

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.46, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.36, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.11, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=0.952, policy_loss=-4.7]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=0.94, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.19, policy_loss=-4.58]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.39, policy_loss=-4.57]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.58, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=0.963, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.06, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|▏         | 66/5000 [00:15<20:04,  4.10it/s, critic_loss=1.01, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.1, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.13, policy_loss=-4.75]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.35, policy_loss=-4.74]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=0.863, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1, policy_loss=-4.62]    

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.1, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.39, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.03, policy_loss=-4.76]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.29, policy_loss=-4.73]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.3, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.1, policy_loss=-4.74] 

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.44, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=1.18, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 67/5000 [00:16<19:53,  4.13it/s, critic_loss=0.9, policy_loss=-4.6]  

cri
pol


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.1, policy_loss=-4.6]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=0.964, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.1, policy_loss=-4.66] 

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.1, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.31, policy_loss=-4.7]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.1, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.35, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.05, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=0.992, policy_loss=-4.6]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.13, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.22, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 68/5000 [00:16<19:27,  4.23it/s, critic_loss=1.46, policy_loss=-4.67]

cri
pol


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.33, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.61, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.42, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.4, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.46, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.45, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.62, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=0.898, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.27, policy_loss=-4.68] 

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.75, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.29, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 69/5000 [00:16<18:54,  4.34it/s, critic_loss=1.15, policy_loss=-4.68]

cri
pol


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.43, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.36, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.59, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.83, policy_loss=-4.74]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.61, policy_loss=-4.74]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.65, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.52, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.71, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.24, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.86, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.91, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.5, policy_loss=-4.62] 

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.44, policy_loss=-4.57]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.7, policy_loss=-4.58] 

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.86, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 70/5000 [00:16<18:16,  4.50it/s, critic_loss=1.27, policy_loss=-4.74]

cri
pol


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.68, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.51, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.53, policy_loss=-4.55]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=2.02, policy_loss=-4.53]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.79, policy_loss=-4.55]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.8, policy_loss=-4.59] 

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.43, policy_loss=-4.7]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.84, policy_loss=-4.74]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.29, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.79, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.89, policy_loss=-4.59]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.35, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:16<17:51,  4.60it/s, critic_loss=1.53, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:17<17:51,  4.60it/s, critic_loss=1.55, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:17<17:51,  4.60it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:17<17:51,  4.60it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:17<17:51,  4.60it/s, critic_loss=1.37, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:17<17:51,  4.60it/s, critic_loss=1.49, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 71/5000 [00:17<17:51,  4.60it/s, critic_loss=1.58, policy_loss=-4.68]

cri
pol


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.78, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.75, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.35, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.35, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.54, policy_loss=-4.56]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.62, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.77, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.44, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.44, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.38, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.64, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.66, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.58, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.41, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.5, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|▏         | 72/5000 [00:17<18:07,  4.53it/s, critic_loss=1.18, policy_loss=-4.62]

cri
pol


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.58, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=2.44, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.46, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.61, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.37, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.63, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.27, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.36, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.23, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.61, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.7, policy_loss=-4.64] 

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.7, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.75, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.42, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.51, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.5, policy_loss=-4.72] 

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.49, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 73/5000 [00:17<18:37,  4.41it/s, critic_loss=1.28, policy_loss=-4.64]

cri
pol


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.26, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.35, policy_loss=-4.62]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.43, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.52, policy_loss=-4.66]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=0.985, policy_loss=-4.65]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.54, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.3, policy_loss=-4.63] 

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.62, policy_loss=-4.64]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.33, policy_loss=-4.63]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.74, policy_loss=-4.68]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.3, policy_loss=-4.7]  

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.38, policy_loss=-4.71]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.65, policy_loss=-4.73]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.5, policy_loss=-4.65] 

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.18, policy_loss=-4.61]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.63, policy_loss=-4.6] 

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   1%|▏         | 74/5000 [00:17<18:00,  4.56it/s, critic_loss=1.39, policy_loss=-4.73]

cri
pol


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.52, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.54, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.56, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.34, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.39, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.58, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.85, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.4, policy_loss=-4.75] 

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.36, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.67, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.88, policy_loss=-4.54]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.17, policy_loss=-4.55]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.61, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.39, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.48, policy_loss=-4.79]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.5, policy_loss=-4.75] 

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.47, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 75/5000 [00:17<18:04,  4.54it/s, critic_loss=1.54, policy_loss=-4.55]

cri
pol


Epoch 1/10:   2%|▏         | 76/5000 [00:17<17:43,  4.63it/s, critic_loss=1.33, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:17<17:43,  4.63it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:17<17:43,  4.63it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:17<17:43,  4.63it/s, critic_loss=1.52, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:17<17:43,  4.63it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.63, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.69, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.39, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.32, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.47, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.34, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.4, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.24, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.47, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.19, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 76/5000 [00:18<17:43,  4.63it/s, critic_loss=1.17, policy_loss=-4.72]

cri
pol


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=0.983, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.1, policy_loss=-4.62]  

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.13, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.7, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.98, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=0.807, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.18, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.81, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.46, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.11, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.24, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.2, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=0.882, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.06, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 77/5000 [00:18<17:34,  4.67it/s, critic_loss=0.956, policy_loss=-4.64]

cri
pol


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.2, policy_loss=-4.66]  

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.28, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.3, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.34, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.29, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.47, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.39, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.53, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.26, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.33, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.38, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.1, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.4, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 78/5000 [00:18<17:29,  4.69it/s, critic_loss=1.19, policy_loss=-4.7]

cri
pol


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.19, policy_loss=-4.78]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.58, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.28, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=0.972, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.12, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.2, policy_loss=-4.59] 

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.27, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.4, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.14, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 79/5000 [00:18<17:18,  4.74it/s, critic_loss=1.18, policy_loss=-4.68]

cri
pol


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=0.869, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.61, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.49, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.09, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.18, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.25, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.5, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.26, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=0.918, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.09, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.46, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.13, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.54, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 80/5000 [00:18<17:11,  4.77it/s, critic_loss=1.05, policy_loss=-4.65]

cri
pol


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.24, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.36, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.34, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.21, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.38, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.09, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.17, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.72, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.45, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.68, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.33, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.8, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.55, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.27, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.21, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=0.98, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 81/5000 [00:19<17:30,  4.68it/s, critic_loss=1, policy_loss=-4.66]   

cri
pol


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.43, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.26, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.62, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.34, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=0.963, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.32, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.39, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.98, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=0.98, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.2, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.37, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.43, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.34, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.26, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 82/5000 [00:19<17:19,  4.73it/s, critic_loss=1.27, policy_loss=-4.65]

cri
pol


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.37, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.68, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.22, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.36, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.43, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.43, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.51, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.35, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.53, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.4, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.61, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.53, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.51, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.19, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 83/5000 [00:19<17:29,  4.69it/s, critic_loss=1.59, policy_loss=-4.66]

cri
pol


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=2.88, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.75, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.54, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.59, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.37, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.73, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.4, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=0.949, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.48, policy_loss=-4.57] 

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.8, policy_loss=-4.6]  

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.39, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.41, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.78, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.44, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.49, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 84/5000 [00:19<17:15,  4.75it/s, critic_loss=1.32, policy_loss=-4.58]

cri
pol


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.29, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.6, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.66, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.77, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.63, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.52, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.75, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.65, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.66, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=2.02, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:19<17:14,  4.75it/s, critic_loss=1.46, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.47, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.62, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.71, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.51, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.63, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.64, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.79, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 85/5000 [00:20<17:14,  4.75it/s, critic_loss=1.67, policy_loss=-4.65]

cri
pol


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.44, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.91, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.43, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.72, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.76, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.47, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.78, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.44, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.9, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.78, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=2.06, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.32, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.94, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.63, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.69, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.82, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.4, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.7, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.57, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 86/5000 [00:20<18:36,  4.40it/s, critic_loss=1.43, policy_loss=-4.65]

cri
pol


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.6, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=2.08, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.56, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.73, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.58, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.55, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.57, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.69, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.53, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.77, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.83, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.62, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.78, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.8, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.53, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.66, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.47, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.6, policy_loss=-4.59] 

cri


Epoch 1/10:   2%|▏         | 87/5000 [00:20<19:02,  4.30it/s, critic_loss=1.44, policy_loss=-4.58]

cri
pol


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.65, policy_loss=-4.52]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.75, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.71, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.62, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.59, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.52, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.5, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.56, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.64, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.94, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.92, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.48, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.52, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.56, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.29, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.92, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.91, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 88/5000 [00:20<19:14,  4.25it/s, critic_loss=1.5, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.37, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.21, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.72, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.33, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.71, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.5, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.3, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.11, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.04, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.53, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1, policy_loss=-4.63]   

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:20<19:01,  4.30it/s, critic_loss=1.56, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:21<19:01,  4.30it/s, critic_loss=1.54, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:21<19:01,  4.30it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:21<19:01,  4.30it/s, critic_loss=1.27, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 89/5000 [00:21<19:01,  4.30it/s, critic_loss=1.9, policy_loss=-4.62] 

cri
pol


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.46, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.12, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1, policy_loss=-4.62]  

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.3, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.61, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.11, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.48, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=0.987, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.4, policy_loss=-4.67]  

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.83, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.66, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.7, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.33, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.49, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.56, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 90/5000 [00:21<19:06,  4.28it/s, critic_loss=1.31, policy_loss=-4.56]

cri
pol


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.71, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.5, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.33, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.41, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.31, policy_loss=-4.81]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.25, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.22, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.11, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.08, policy_loss=-4.51]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.4, policy_loss=-4.54] 

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.24, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.36, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.58, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.23, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.23, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.23, policy_loss=-4.55]

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.2, policy_loss=-4.57] 

cri


Epoch 1/10:   2%|▏         | 91/5000 [00:21<19:24,  4.21it/s, critic_loss=1.77, policy_loss=-4.68]

cri
pol


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.14, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.14, policy_loss=-4.81]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.85, policy_loss=-4.83]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=0.955, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.06, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.8, policy_loss=-4.58] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.27, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.6, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=0.886, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.54, policy_loss=-4.78] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.16, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.33, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.11, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.45, policy_loss=-4.53]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=0.993, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=0.933, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.14, policy_loss=-4.74] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.33, policy_loss=-4.8] 

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.15, policy_loss=-4.79]

cri


Epoch 1/10:   2%|▏         | 92/5000 [00:21<19:09,  4.27it/s, critic_loss=1.45, policy_loss=-4.77]

cri
pol


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.17, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.18, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=0.917, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=0.806, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.03, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.3, policy_loss=-4.77] 

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.12, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.62, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=0.948, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.31, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.1, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.52, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=0.997, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.14, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   2%|▏         | 93/5000 [00:21<18:14,  4.48it/s, critic_loss=1.28, policy_loss=-4.7]

cri
pol


Epoch 1/10:   2%|▏         | 94/5000 [00:21<18:15,  4.48it/s, critic_loss=0.787, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:21<18:15,  4.48it/s, critic_loss=0.864, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:21<18:15,  4.48it/s, critic_loss=0.9, policy_loss=-4.71]  

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:21<18:15,  4.48it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:21<18:15,  4.48it/s, critic_loss=0.821, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.966, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.811, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.44, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.925, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.902, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.27, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.815, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.22, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.806, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=1.09, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.915, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 94/5000 [00:22<18:15,  4.48it/s, critic_loss=0.932, policy_loss=-4.65]

cri
pol


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.01, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.89, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.04, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.908, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.95, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.663, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.03, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.991, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1, policy_loss=-4.7]    

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.879, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.875, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.35, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.883, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.04, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=0.969, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 95/5000 [00:22<18:01,  4.53it/s, critic_loss=1.09, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.939, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.911, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.916, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.901, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.24, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.991, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.15, policy_loss=-4.75] 

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.48, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.02, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=1.04, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.919, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.959, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.977, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 96/5000 [00:22<17:56,  4.56it/s, critic_loss=0.775, policy_loss=-4.64]

cri
pol


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.06, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.73, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.94, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.982, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.22, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.849, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.906, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.836, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.75, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.908, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.96, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.912, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.959, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 97/5000 [00:22<17:36,  4.64it/s, critic_loss=0.946, policy_loss=-4.65]

cri
pol


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=0.93, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=0.987, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.02, policy_loss=-4.59] 

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1, policy_loss=-4.7]    

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.21, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.35, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.31, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.31, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.06, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.31, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=0.936, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:22<18:24,  4.44it/s, critic_loss=1.38, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:23<18:24,  4.44it/s, critic_loss=1.7, policy_loss=-4.73] 

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:23<18:24,  4.44it/s, critic_loss=1.18, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:23<18:24,  4.44it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:23<18:24,  4.44it/s, critic_loss=1.24, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 98/5000 [00:23<18:24,  4.44it/s, critic_loss=1.1, policy_loss=-4.61] 

cri
pol


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.08, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.03, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.36, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.971, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.12, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.982, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.98, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.943, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.962, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.2, policy_loss=-4.65]  

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.05, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.956, policy_loss=-4.78]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=0.903, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.21, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.04, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 99/5000 [00:23<18:25,  4.43it/s, critic_loss=1.35, policy_loss=-4.59]

cri
pol


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=0.955, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.5, policy_loss=-4.67]  

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.41, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.34, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.18, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.27, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=0.993, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.05, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=0.918, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=0.994, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=0.92, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1, policy_loss=-4.69]   

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.2, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.12, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 100/5000 [00:23<18:41,  4.37it/s, critic_loss=1.14, policy_loss=-4.67]

cri
pol


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.903, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.969, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.39, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.19, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.971, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.2, policy_loss=-4.63]  

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.39, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.34, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.15, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.963, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.16, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.95, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.998, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=0.95, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 101/5000 [00:23<18:12,  4.48it/s, critic_loss=1.13, policy_loss=-4.66]

cri
pol


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.03, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.26, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=0.898, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.08, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=0.841, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.11, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.31, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=0.93, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.28, policy_loss=-4.78]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.28, policy_loss=-4.82]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=1.06, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=0.966, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 102/5000 [00:23<17:51,  4.57it/s, critic_loss=0.865, policy_loss=-4.63]

cri
pol


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=1.06, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=1.12, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=0.977, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=1.11, policy_loss=-4.7]  

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=1.06, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:23<17:38,  4.63it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.57, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=0.892, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=0.961, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.07, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=0.968, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.43, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 103/5000 [00:24<17:38,  4.63it/s, critic_loss=1.07, policy_loss=-4.69]

cri
pol


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.01, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.48, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=0.8, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.14, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=0.975, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=0.882, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.24, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.13, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.33, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.09, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=0.902, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.11, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=0.882, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 104/5000 [00:24<17:33,  4.65it/s, critic_loss=1.11, policy_loss=-4.67] 

cri
pol


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.19, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.45, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.52, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.08, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.6, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.13, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=0.997, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.18, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.31, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.3, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 105/5000 [00:24<17:18,  4.71it/s, critic_loss=1.34, policy_loss=-4.66]

cri
pol


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.33, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.48, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.17, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.4, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.46, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.29, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.38, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.43, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.36, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.44, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.49, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 106/5000 [00:24<16:45,  4.87it/s, critic_loss=1.43, policy_loss=-4.64]

cri
pol


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.65, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.81, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.65, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.67, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.81, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.57, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.45, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.43, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.71, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.41, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.4, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.39, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.7, policy_loss=-4.7]  

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.96, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.55, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 107/5000 [00:24<16:56,  4.81it/s, critic_loss=1.48, policy_loss=-4.63]

cri
pol


Epoch 1/10:   2%|▏         | 108/5000 [00:24<16:45,  4.87it/s, critic_loss=1.46, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:24<16:45,  4.87it/s, critic_loss=1.41, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:24<16:45,  4.87it/s, critic_loss=1.64, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:24<16:45,  4.87it/s, critic_loss=1.51, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:24<16:45,  4.87it/s, critic_loss=1.62, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:24<16:45,  4.87it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.68, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.3, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.35, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.65, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.21, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.55, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.64, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.4, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.15, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.38, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.48, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 108/5000 [00:25<16:45,  4.87it/s, critic_loss=1.85, policy_loss=-4.68]

cri
pol


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.61, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.38, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.31, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.66, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.59, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.62, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.3, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.22, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.79, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.22, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.24, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.23, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.62, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.35, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.41, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.29, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.35, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.66, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 109/5000 [00:25<16:41,  4.88it/s, critic_loss=1.27, policy_loss=-4.53]

cri
pol


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.71, policy_loss=-4.55]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.62, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.32, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.39, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=2, policy_loss=-4.66]   

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.3, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.03, policy_loss=-4.55]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.45, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.34, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.31, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.69, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.3, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.43, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.46, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.07, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 110/5000 [00:25<17:05,  4.77it/s, critic_loss=1.4, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.47, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.02, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=0.975, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.32, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.14, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.77, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.29, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.3, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.1, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.49, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.25, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 111/5000 [00:25<16:46,  4.86it/s, critic_loss=1.36, policy_loss=-4.68]

cri
pol


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.17, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.03, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=0.979, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.06, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.06, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.31, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=0.935, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.16, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.13, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:25<17:27,  4.67it/s, critic_loss=1.67, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:26<17:27,  4.67it/s, critic_loss=1.7, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:26<17:27,  4.67it/s, critic_loss=1.32, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:26<17:27,  4.67it/s, critic_loss=1.06, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:26<17:27,  4.67it/s, critic_loss=1.3, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 112/5000 [00:26<17:27,  4.67it/s, critic_loss=0.977, policy_loss=-4.68]

cri
pol


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.25, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.2, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=0.998, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.42, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.41, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.21, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.37, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.45, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.19, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.22, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.2, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.45, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.29, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.36, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.24, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.1, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1, policy_loss=-4.7]   

cri


Epoch 1/10:   2%|▏         | 113/5000 [00:26<18:30,  4.40it/s, critic_loss=1.01, policy_loss=-4.62]

cri
pol


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.35, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.47, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.26, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.44, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.06, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.21, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.18, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.09, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.32, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.87, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.09, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=0.989, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.13, policy_loss=-4.65] 

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.6, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.15, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 114/5000 [00:26<18:09,  4.49it/s, critic_loss=1.35, policy_loss=-4.67]

cri
pol


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=0.997, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.31, policy_loss=-4.7]  

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.63, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.34, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.57, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.36, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.35, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.18, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.61, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.18, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.34, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.35, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.57, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.32, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 115/5000 [00:26<17:33,  4.63it/s, critic_loss=1.32, policy_loss=-4.6] 

cri
pol


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.28, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.4, policy_loss=-4.6]  

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.41, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.55, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.32, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.4, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.46, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.73, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.58, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.46, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.46, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.92, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.51, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.26, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.53, policy_loss=-4.55]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.76, policy_loss=-4.53]

cri


Epoch 1/10:   2%|▏         | 116/5000 [00:26<17:25,  4.67it/s, critic_loss=1.5, policy_loss=-4.52] 

cri
pol


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.52, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.79, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.8, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.59, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.39, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.37, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.56, policy_loss=-4.53]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=1.51, policy_loss=-4.54]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:26<17:04,  4.77it/s, critic_loss=2.27, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.7, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.56, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.25, policy_loss=-4.73]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.38, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.67, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.52, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.27, policy_loss=-4.55]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.6, policy_loss=-4.59] 

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.55, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.37, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 117/5000 [00:27<17:04,  4.77it/s, critic_loss=1.72, policy_loss=-4.77]

cri
pol


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.73, policy_loss=-4.77]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.57, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.91, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.99, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.88, policy_loss=-4.56]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.38, policy_loss=-4.51]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.95, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.98, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.89, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.61, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.81, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.61, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.66, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.43, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.86, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.57, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.75, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.45, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 118/5000 [00:27<18:09,  4.48it/s, critic_loss=1.81, policy_loss=-4.66]

cri
pol


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.32, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.53, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.83, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.27, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.42, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.54, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.99, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.64, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.57, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.53, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.53, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.53, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.66, policy_loss=-4.58]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.75, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.69, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.47, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.45, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=2.09, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 119/5000 [00:27<17:49,  4.56it/s, critic_loss=1.59, policy_loss=-4.64]

cri
pol


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.87, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.25, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.36, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.43, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.78, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.37, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.51, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.57, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.39, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.49, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.5, policy_loss=-4.62] 

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.3, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.43, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.34, policy_loss=-4.6]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.3, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.64, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.81, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.54, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 120/5000 [00:27<17:31,  4.64it/s, critic_loss=1.47, policy_loss=-4.69]

cri
pol


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.4, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.7, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.62, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.26, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.61, policy_loss=-4.76]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.59, policy_loss=-4.74]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.31, policy_loss=-4.57]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.39, policy_loss=-4.54]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.97, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.84, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.58, policy_loss=-4.78]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.66, policy_loss=-4.78]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.85, policy_loss=-4.75]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.63, policy_loss=-4.71]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.45, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.3, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 121/5000 [00:27<17:51,  4.55it/s, critic_loss=1.08, policy_loss=-4.61]

cri
pol


Epoch 1/10:   2%|▏         | 122/5000 [00:27<17:40,  4.60it/s, critic_loss=1.45, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.32, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.21, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.36, policy_loss=-4.61]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.24, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.3, policy_loss=-4.68] 

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.06, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=2, policy_loss=-4.77]   

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.06, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.39, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.24, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.04, policy_loss=-4.59]

cri


Epoch 1/10:   2%|▏         | 122/5000 [00:28<17:40,  4.60it/s, critic_loss=1.07, policy_loss=-4.62]

cri
pol


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.54, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=0.975, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.38, policy_loss=-4.72] 

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.47, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=0.882, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.15, policy_loss=-4.66] 

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.36, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.29, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.07, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.24, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=1.54, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 123/5000 [00:28<17:42,  4.59it/s, critic_loss=0.914, policy_loss=-4.67]

cri
pol


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.34, policy_loss=-4.71] 

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.48, policy_loss=-4.7] 

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.23, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=0.851, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.07, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=0.902, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.13, policy_loss=-4.7]  

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=0.959, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.13, policy_loss=-4.61] 

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=0.926, policy_loss=-4.62]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=0.971, policy_loss=-4.6] 

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.07, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=0.797, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▏         | 124/5000 [00:28<17:17,  4.70it/s, critic_loss=1.21, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.13, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.41, policy_loss=-4.63]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.888, policy_loss=-4.68]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.973, policy_loss=-4.65]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.881, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.07, policy_loss=-4.63] 

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.901, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.07, policy_loss=-4.64] 

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.97, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.944, policy_loss=-4.64]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.867, policy_loss=-4.67]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.955, policy_loss=-4.7]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.778, policy_loss=-4.66]

cri


Epoch 1/10:   2%|▎         | 125/5000 [00:28<17:07,  4.74it/s, critic_loss=0.717, policy_loss=-4.68]

cri
pol


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.995, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.87, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.943, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.23, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.968, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.19, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.76, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.912, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.854, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=0.914, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:28<17:20,  4.68it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:29<17:20,  4.68it/s, critic_loss=1.65, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:29<17:20,  4.68it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:29<17:20,  4.68it/s, critic_loss=0.983, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:29<17:20,  4.68it/s, critic_loss=0.793, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 126/5000 [00:29<17:20,  4.68it/s, critic_loss=0.872, policy_loss=-4.67]

cri
pol


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=0.825, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.09, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=0.98, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.32, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=0.814, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.04, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.02, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=0.879, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.17, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=0.872, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 127/5000 [00:29<17:44,  4.58it/s, critic_loss=1.17, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.09, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.993, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.8, policy_loss=-4.65]  

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.853, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.783, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.37, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.869, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.835, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.852, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.935, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.929, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.975, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.96, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.24, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.03, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=0.985, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 128/5000 [00:29<17:41,  4.59it/s, critic_loss=1.07, policy_loss=-4.68]

cri
pol


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.992, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.21, policy_loss=-4.61] 

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.19, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.965, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.22, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.892, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.901, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.89, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.07, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.53, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.916, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.756, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.1, policy_loss=-4.75]  

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.918, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.975, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.794, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 129/5000 [00:29<17:09,  4.73it/s, critic_loss=0.96, policy_loss=-4.59] 

cri
pol


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.817, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.976, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.01, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.971, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.86, policy_loss=-4.63] 

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.841, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.09, policy_loss=-4.63] 

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.83, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.903, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.948, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.18, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.18, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=0.819, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 130/5000 [00:29<16:56,  4.79it/s, critic_loss=1.53, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.15, policy_loss=-4.77]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=0.996, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=0.716, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.15, policy_loss=-4.62] 

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.03, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.37, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.34, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.12, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:29<16:46,  4.84it/s, critic_loss=1.05, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=0.771, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=0.977, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=1.52, policy_loss=-4.59] 

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=0.899, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=1.23, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 131/5000 [00:30<16:46,  4.84it/s, critic_loss=1.12, policy_loss=-4.73]

cri
pol


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=0.884, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.04, policy_loss=-4.59]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1, policy_loss=-4.6]    

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.49, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.1, policy_loss=-4.72] 

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=0.983, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=0.991, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=0.96, policy_loss=-4.63] 

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.02, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.07, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=0.85, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.31, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.11, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.2, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=0.965, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 132/5000 [00:30<16:55,  4.79it/s, critic_loss=1.19, policy_loss=-4.67] 

cri
pol


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.24, policy_loss=-4.59]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.42, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.06, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.9, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.39, policy_loss=-4.77]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.11, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.04, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.33, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=0.967, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.32, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.21, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=0.963, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.21, policy_loss=-4.61] 

cri


Epoch 1/10:   3%|▎         | 133/5000 [00:30<16:45,  4.84it/s, critic_loss=1.65, policy_loss=-4.66]

cri
pol


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.918, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.02, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.03, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.995, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.09, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=2.83, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.734, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.975, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.913, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.859, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.99, policy_loss=-4.72] 

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.948, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.896, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.882, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.894, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 134/5000 [00:30<17:00,  4.77it/s, critic_loss=0.9, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.918, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.835, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.1, policy_loss=-4.69]  

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.947, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.948, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.25, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.888, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.989, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.35, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.86, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.711, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.18, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.955, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.958, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.82, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.1, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=0.992, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 135/5000 [00:30<16:39,  4.87it/s, critic_loss=1.12, policy_loss=-4.64] 

cri
pol


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.825, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.927, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=1.25, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.972, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.855, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.874, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.958, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:30<16:29,  4.91it/s, critic_loss=0.97, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=0.762, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1.01, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=0.884, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1, policy_loss=-4.7]     

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1.07, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=0.997, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=0.864, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 136/5000 [00:31<16:29,  4.91it/s, critic_loss=1.23, policy_loss=-4.62]

cri
pol


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.27, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.42, policy_loss=-4.78]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.15, policy_loss=-4.77]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.14, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.01, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.04, policy_loss=-4.52]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.42, policy_loss=-4.54]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.24, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=0.976, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.21, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.15, policy_loss=-4.78]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.24, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.09, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.29, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=0.908, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=0.917, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 137/5000 [00:31<16:34,  4.89it/s, critic_loss=1.15, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.1, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.07, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.35, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.4, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.22, policy_loss=-4.6]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.58, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.5, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=0.991, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.47, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.1, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=0.926, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.54, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=0.899, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 138/5000 [00:31<16:43,  4.84it/s, critic_loss=1.12, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.52, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=0.895, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.48, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=0.976, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.08, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.05, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.14, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.12, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.49, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=0.93, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.49, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.32, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 139/5000 [00:31<16:53,  4.80it/s, critic_loss=1.2, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.27, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.11, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.34, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.24, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=0.943, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.35, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.32, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.53, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.48, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 140/5000 [00:31<17:07,  4.73it/s, critic_loss=1.52, policy_loss=-4.67]

cri
pol


Epoch 1/10:   3%|▎         | 141/5000 [00:31<17:07,  4.73it/s, critic_loss=1.08, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:31<17:07,  4.73it/s, critic_loss=1.59, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.02, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.35, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.11, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.21, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.27, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.08, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.44, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=0.927, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.37, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.28, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.43, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 141/5000 [00:32<17:07,  4.73it/s, critic_loss=1.49, policy_loss=-4.62]

cri
pol


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=0.933, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.19, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.36, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.41, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.11, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.17, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.41, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.14, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.15, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.31, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.09, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.45, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.25, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.72, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 142/5000 [00:32<18:03,  4.48it/s, critic_loss=1.09, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=0.994, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.08, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.03, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.23, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.71, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.23, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.51, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.18, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.21, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.37, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.46, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.28, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.35, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 143/5000 [00:32<19:07,  4.23it/s, critic_loss=1.15, policy_loss=-4.62]

cri
pol


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.47, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.33, policy_loss=-4.57]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.07, policy_loss=-4.56]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.26, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.69, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.32, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.23, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.34, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.05, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.8, policy_loss=-4.59] 

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.46, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.26, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.24, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.19, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.5, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.3, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 144/5000 [00:32<18:24,  4.40it/s, critic_loss=1.78, policy_loss=-4.61]

cri
pol


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.33, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.18, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.55, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.35, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.4, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.4, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:32<17:54,  4.52it/s, critic_loss=1.37, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.4, policy_loss=-4.69] 

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=0.943, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=2.11, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.46, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.37, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.85, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 145/5000 [00:33<17:54,  4.52it/s, critic_loss=1.03, policy_loss=-4.73]

cri
pol


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.33, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=0.976, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.1, policy_loss=-4.62]  

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.1, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=0.965, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.28, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=0.952, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.31, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.49, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.03, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.12, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.08, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.36, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.2, policy_loss=-4.61] 

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.95, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 146/5000 [00:33<17:51,  4.53it/s, critic_loss=1.09, policy_loss=-4.61]

cri
pol


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.27, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=0.935, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.27, policy_loss=-4.69] 

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.13, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.29, policy_loss=-4.59]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.22, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.11, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.23, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.2, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.89, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.07, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.04, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 147/5000 [00:33<17:22,  4.66it/s, critic_loss=1.2, policy_loss=-4.66] 

cri
pol


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=0.946, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.04, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.1, policy_loss=-4.59] 

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.43, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.37, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.46, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.24, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.44, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.11, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.22, policy_loss=-4.54]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.39, policy_loss=-4.56]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.04, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=0.901, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=1.07, policy_loss=-4.76] 

cri


Epoch 1/10:   3%|▎         | 148/5000 [00:33<17:15,  4.69it/s, critic_loss=0.889, policy_loss=-4.74]

cri
pol


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.05, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=0.976, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.01, policy_loss=-4.62] 

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.04, policy_loss=-4.59]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.19, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=0.915, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.27, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.59, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.15, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.05, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 149/5000 [00:33<17:32,  4.61it/s, critic_loss=0.981, policy_loss=-4.68]

cri
pol


Epoch 1/10:   3%|▎         | 150/5000 [00:33<17:52,  4.52it/s, critic_loss=1.01, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.59, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1, policy_loss=-4.75]  

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.16, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.06, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=0.925, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.19, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=0.983, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.07, policy_loss=-4.61] 

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 150/5000 [00:34<17:52,  4.52it/s, critic_loss=1.13, policy_loss=-4.73]

cri
pol


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.42, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=0.955, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.2, policy_loss=-4.65]  

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=0.901, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.05, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.11, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.27, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.02, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.19, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=0.985, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.4, policy_loss=-4.68]  

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.4, policy_loss=-4.72] 

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=0.941, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 151/5000 [00:34<18:34,  4.35it/s, critic_loss=1.04, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.986, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.879, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.28, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.1, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.958, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.28, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.935, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.22, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.861, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.04, policy_loss=-4.58] 

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.3, policy_loss=-4.63] 

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=1.23, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.987, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.947, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 152/5000 [00:34<19:10,  4.21it/s, critic_loss=0.879, policy_loss=-4.6] 

cri
pol


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.16, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.907, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.24, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.909, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.31, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.891, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.978, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.866, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.911, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.58, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.93, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.28, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 153/5000 [00:34<18:39,  4.33it/s, critic_loss=0.993, policy_loss=-4.67]

cri
pol


Epoch 1/10:   3%|▎         | 154/5000 [00:34<18:48,  4.30it/s, critic_loss=1.18, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:34<18:48,  4.30it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:34<18:48,  4.30it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:34<18:48,  4.30it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:34<18:48,  4.30it/s, critic_loss=1.26, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.12, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=0.835, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=0.959, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=0.962, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.26, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=0.998, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=0.992, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 154/5000 [00:35<18:48,  4.30it/s, critic_loss=1.37, policy_loss=-4.74]

cri
pol


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.854, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.01, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.805, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.18, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.911, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.983, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.21, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.29, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.27, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.914, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.983, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=1.21, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.672, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 155/5000 [00:35<18:23,  4.39it/s, critic_loss=0.917, policy_loss=-4.63]

cri
pol


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1, policy_loss=-4.65]    

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=0.996, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.07, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=0.98, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.1, policy_loss=-4.76] 

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.02, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.12, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=0.941, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.1, policy_loss=-4.75]  

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.61, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=0.79, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=0.883, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 156/5000 [00:35<18:47,  4.30it/s, critic_loss=0.959, policy_loss=-4.71]

cri
pol


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=0.948, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.22, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.1, policy_loss=-4.63] 

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=0.957, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.12, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=0.794, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=0.974, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.15, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=0.91, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.13, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.12, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.07, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.13, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=0.96, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.67, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 157/5000 [00:35<18:22,  4.39it/s, critic_loss=1.03, policy_loss=-4.72]

cri
pol


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.08, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.16, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.35, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.23, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.52, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.3, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.31, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.17, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:35<17:40,  4.56it/s, critic_loss=1.42, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:36<17:40,  4.56it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:36<17:40,  4.56it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:36<17:40,  4.56it/s, critic_loss=1.27, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 158/5000 [00:36<17:40,  4.56it/s, critic_loss=1.14, policy_loss=-4.62]

cri
pol


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.28, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.61, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.27, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.45, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=0.983, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.24, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.05, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.32, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.25, policy_loss=-4.59]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.21, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.36, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.38, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 159/5000 [00:36<17:44,  4.55it/s, critic_loss=1.33, policy_loss=-4.63]

cri
pol


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.32, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.77, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.35, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.3, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.37, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.44, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.23, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.38, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=0.974, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.23, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.39, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.02, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 160/5000 [00:36<17:28,  4.61it/s, critic_loss=1.26, policy_loss=-4.72]

cri
pol


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.23, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.45, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.23, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.47, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.23, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.38, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.45, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=0.971, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.83, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.6, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=0.98, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.46, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 161/5000 [00:36<17:20,  4.65it/s, critic_loss=1.15, policy_loss=-4.62]

cri
pol


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.56, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.45, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.26, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.32, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.4, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.23, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.29, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.42, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=1.46, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 162/5000 [00:36<17:11,  4.69it/s, critic_loss=0.98, policy_loss=-4.65]

cri
pol


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.11, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.42, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.09, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=0.95, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.56, policy_loss=-4.59]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:36<17:38,  4.57it/s, critic_loss=1.17, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.23, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.13, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.36, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.49, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.45, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.41, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.22, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.26, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 163/5000 [00:37<17:38,  4.57it/s, critic_loss=1.12, policy_loss=-4.67]

cri
pol


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.21, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.08, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.07, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.26, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.08, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.13, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.17, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.54, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.35, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.2, policy_loss=-4.62] 

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.57, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 164/5000 [00:37<17:21,  4.64it/s, critic_loss=1.43, policy_loss=-4.63]

cri
pol


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.4, policy_loss=-4.62] 

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.88, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.34, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.21, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=0.947, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.52, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=0.97, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.53, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.4, policy_loss=-4.71] 

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=1.25, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 165/5000 [00:37<17:46,  4.53it/s, critic_loss=0.863, policy_loss=-4.69]

cri
pol


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.27, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.1, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.6, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=0.975, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=0.976, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=0.906, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.21, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=0.948, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.04, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.47, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.2, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=0.948, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 166/5000 [00:37<17:53,  4.51it/s, critic_loss=1.26, policy_loss=-4.66] 

cri
pol


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.04, policy_loss=-4.61]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=0.928, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=0.91, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.41, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.32, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.11, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.28, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 167/5000 [00:37<17:21,  4.64it/s, critic_loss=1.21, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   3%|▎         | 168/5000 [00:37<16:54,  4.76it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:37<16:54,  4.76it/s, critic_loss=0.952, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:37<16:54,  4.76it/s, critic_loss=1.03, policy_loss=-4.61] 

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.45, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.02, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=0.966, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.21, policy_loss=-4.73] 

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=0.872, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=0.94, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=0.908, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=0.961, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.22, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.64, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.21, policy_loss=-4.75]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=1.07, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 168/5000 [00:38<16:54,  4.76it/s, critic_loss=0.894, policy_loss=-4.73]

cri
pol


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.38, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.61, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=0.953, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.51, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.72, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=0.867, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.39, policy_loss=-4.68] 

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.11, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=0.856, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=0.945, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=1.27, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 169/5000 [00:38<16:36,  4.85it/s, critic_loss=0.989, policy_loss=-4.63]

cri
pol


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.68, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.11, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.3, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.09, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.58, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=0.994, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=0.879, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=0.856, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.23, policy_loss=-4.74]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.22, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.26, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.31, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=0.92, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 170/5000 [00:38<16:39,  4.83it/s, critic_loss=1.14, policy_loss=-4.68]

cri
pol


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=0.825, policy_loss=-4.7]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.17, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.38, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=0.962, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.21, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.19, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=0.927, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.51, policy_loss=-4.63] 

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   3%|▎         | 171/5000 [00:38<16:31,  4.87it/s, critic_loss=1.08, policy_loss=-4.76]

cri
pol


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.17, policy_loss=-4.76]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.2, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.07, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.27, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.26, policy_loss=-4.56]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=0.984, policy_loss=-4.6]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.06, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.4, policy_loss=-4.72] 

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.13, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.19, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.39, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.12, policy_loss=-4.58]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.38, policy_loss=-4.6] 

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.94, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:38<16:37,  4.84it/s, critic_loss=1.23, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 172/5000 [00:39<16:37,  4.84it/s, critic_loss=1.09, policy_loss=-4.67]

cri
pol


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.7, policy_loss=-4.66] 

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=0.975, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.19, policy_loss=-4.65] 

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.36, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=0.698, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.19, policy_loss=-4.69] 

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.35, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.12, policy_loss=-4.7] 

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=0.884, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.07, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   3%|▎         | 173/5000 [00:39<17:15,  4.66it/s, critic_loss=1, policy_loss=-4.66]   

cri
pol


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.54, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.01, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.3, policy_loss=-4.7]  

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.32, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.28, policy_loss=-4.73]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=0.904, policy_loss=-4.69]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=0.98, policy_loss=-4.67] 

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=0.978, policy_loss=-4.62]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.21, policy_loss=-4.62] 

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.62, policy_loss=-4.64]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.4, policy_loss=-4.74] 

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.39, policy_loss=-4.78]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=1.51, policy_loss=-4.72]

cri


Epoch 1/10:   3%|▎         | 174/5000 [00:39<17:41,  4.54it/s, critic_loss=0.951, policy_loss=-4.7]

cri
pol


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.03, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.901, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.1, policy_loss=-4.63] 

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.91, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.05, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.92, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.36, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.17, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.973, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.898, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.988, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1, policy_loss=-4.66]    

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=0.971, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.51, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▎         | 175/5000 [00:39<17:27,  4.61it/s, critic_loss=1.13, policy_loss=-4.63]

cri
pol


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.07, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.27, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.17, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.13, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.17, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.887, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.763, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.901, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.916, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.831, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.7, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.68, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.842, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.896, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 176/5000 [00:39<17:19,  4.64it/s, critic_loss=0.879, policy_loss=-4.67]

cri
pol


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=1.44, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.727, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.788, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.965, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=1.62, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.903, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.744, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.983, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=0.855, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=1.25, policy_loss=-4.72] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:39<16:45,  4.80it/s, critic_loss=1.17, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=0.774, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=0.992, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=1.17, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▎         | 177/5000 [00:40<16:45,  4.80it/s, critic_loss=0.759, policy_loss=-4.7]

cri
pol


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.939, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.906, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.05, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.938, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.05, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.07, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.949, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.97, policy_loss=-4.76] 

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.988, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.04, policy_loss=-4.74] 

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.878, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.902, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.04, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.821, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.963, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.808, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.709, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 178/5000 [00:40<16:36,  4.84it/s, critic_loss=0.921, policy_loss=-4.74]

cri
pol


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.817, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.976, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.772, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.832, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.921, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.896, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.85, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.06, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.933, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.06, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.763, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.2, policy_loss=-4.67]  

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.04, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.804, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.843, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.28, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 179/5000 [00:40<16:38,  4.83it/s, critic_loss=0.91, policy_loss=-4.71]

cri
pol


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.868, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.29, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.953, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.66, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.936, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.1, policy_loss=-4.72]  

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.07, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.858, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.873, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.855, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=1.17, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.904, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 180/5000 [00:40<16:43,  4.80it/s, critic_loss=0.958, policy_loss=-4.68]

cri
pol


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.24, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=0.791, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=0.88, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.31, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.1, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=0.888, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.1, policy_loss=-4.74]  

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.27, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=0.986, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=0.816, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.56, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▎         | 181/5000 [00:40<16:44,  4.80it/s, critic_loss=1.2, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.08, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.35, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=0.846, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.12, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:40<16:26,  4.88it/s, critic_loss=1.55, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=0.983, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.05, policy_loss=-4.63] 

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.47, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.48, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.17, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=0.997, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 182/5000 [00:41<16:26,  4.88it/s, critic_loss=1.11, policy_loss=-4.75] 

cri
pol


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=0.83, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.02, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.19, policy_loss=-4.54]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.3, policy_loss=-4.59] 

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.37, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.37, policy_loss=-4.8] 

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.66, policy_loss=-4.81]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.15, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.44, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.17, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.3, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.27, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.07, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.37, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 183/5000 [00:41<16:33,  4.85it/s, critic_loss=1.09, policy_loss=-4.66]

cri
pol


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.3, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.09, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=0.967, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.08, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=0.928, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.1, policy_loss=-4.71]  

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=0.919, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.11, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.25, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.52, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=0.986, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.24, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=0.962, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=0.968, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 184/5000 [00:41<16:08,  4.97it/s, critic_loss=1.09, policy_loss=-4.67]

cri
pol


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.8, policy_loss=-4.72] 

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=0.904, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.15, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.38, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.22, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.21, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.52, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.35, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=0.875, policy_loss=-4.8]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.15, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.27, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.32, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.06, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.08, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.48, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.6, policy_loss=-4.74] 

cri


Epoch 1/10:   4%|▎         | 185/5000 [00:41<17:11,  4.67it/s, critic_loss=1.14, policy_loss=-4.75]

cri
pol


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.06, policy_loss=-4.79]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.53, policy_loss=-4.79]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.09, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.23, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.22, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.64, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.17, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.37, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.07, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.17, policy_loss=-4.57]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.34, policy_loss=-4.57]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.29, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.23, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▎         | 186/5000 [00:41<17:33,  4.57it/s, critic_loss=1.15, policy_loss=-4.69]

cri
pol


Epoch 1/10:   4%|▎         | 187/5000 [00:41<16:58,  4.73it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:41<16:58,  4.73it/s, critic_loss=0.952, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:41<16:58,  4.73it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:41<16:58,  4.73it/s, critic_loss=1.1, policy_loss=-4.61] 

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=2, policy_loss=-4.66]  

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.23, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=0.952, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.42, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.01, policy_loss=-4.57]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.47, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.52, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.36, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.6, policy_loss=-4.75] 

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.7, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=0.913, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▎         | 187/5000 [00:42<16:58,  4.73it/s, critic_loss=1.15, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.6, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.1, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.08, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=0.951, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.09, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.18, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.41, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.38, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.29, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.46, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.36, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.46, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.2, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.39, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 188/5000 [00:42<16:35,  4.83it/s, critic_loss=1.23, policy_loss=-4.65]

cri
pol


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.34, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=0.994, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.19, policy_loss=-4.74] 

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.41, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.61, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.26, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.33, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.65, policy_loss=-4.79]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.25, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.44, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.24, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.24, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.11, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.47, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.22, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 189/5000 [00:42<16:40,  4.81it/s, critic_loss=1.71, policy_loss=-4.81]

cri
pol


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.8, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.36, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.29, policy_loss=-4.56]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.6, policy_loss=-4.63] 

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.49, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.24, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.37, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.44, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.19, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=0.932, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=0.9, policy_loss=-4.68]  

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.46, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.49, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.17, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.26, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 190/5000 [00:42<16:30,  4.86it/s, critic_loss=1.36, policy_loss=-4.61]

cri
pol


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1, policy_loss=-4.64]   

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.987, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.987, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.872, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.75, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.54, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.934, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.07, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.27, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.951, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.06, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.984, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.17, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.808, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.996, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.917, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.971, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=1.07, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 191/5000 [00:42<16:12,  4.94it/s, critic_loss=0.948, policy_loss=-4.65]

cri
pol


Epoch 1/10:   4%|▍         | 192/5000 [00:42<16:48,  4.77it/s, critic_loss=1.27, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.27, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=0.918, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.18, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=0.899, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.08, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=0.923, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=0.996, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.7, policy_loss=-4.67]  

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=0.979, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.47, policy_loss=-4.74] 

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.72, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 192/5000 [00:43<16:48,  4.77it/s, critic_loss=0.877, policy_loss=-4.65]

cri
pol


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.877, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.27, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.14, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.913, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.28, policy_loss=-4.72] 

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.22, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.808, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.44, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.956, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.839, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1.01, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.975, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.978, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.835, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=0.956, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 193/5000 [00:43<17:20,  4.62it/s, critic_loss=1, policy_loss=-4.72]    

cri
pol


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=0.968, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=0.895, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=0.827, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=0.909, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.1, policy_loss=-4.62]  

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=0.887, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.06, policy_loss=-4.72] 

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.26, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=0.963, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.27, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.11, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 194/5000 [00:43<16:50,  4.75it/s, critic_loss=1.01, policy_loss=-4.72]

cri
pol


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.801, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.941, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.82, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.975, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.3, policy_loss=-4.72]  

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.929, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.871, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.907, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.14, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.977, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.25, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.34, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=0.868, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 195/5000 [00:43<16:44,  4.78it/s, critic_loss=1.15, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.893, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=1.18, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.977, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.916, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.847, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.775, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=1.2, policy_loss=-4.66]  

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.725, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.922, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.854, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.916, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=1, policy_loss=-4.64]    

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:43<16:48,  4.76it/s, critic_loss=0.916, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:44<16:48,  4.76it/s, critic_loss=1.01, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:44<16:48,  4.76it/s, critic_loss=1.17, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:44<16:48,  4.76it/s, critic_loss=1.17, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 196/5000 [00:44<16:48,  4.76it/s, critic_loss=0.901, policy_loss=-4.75]

cri
pol


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.11, policy_loss=-4.75] 

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.17, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=0.953, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=0.978, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=0.939, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.24, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.07, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=0.933, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.48, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=0.812, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=0.849, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 197/5000 [00:44<16:45,  4.78it/s, critic_loss=1.03, policy_loss=-4.66] 

cri
pol


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.882, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.795, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.985, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.977, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.16, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.921, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.919, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.986, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.01, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.944, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.82, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=1.12, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.98, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 198/5000 [00:44<16:34,  4.83it/s, critic_loss=0.918, policy_loss=-4.64]

cri
pol


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.984, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.991, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.963, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.29, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1, policy_loss=-4.71]   

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.91, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.991, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.02, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.46, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.829, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.964, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.919, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.785, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.951, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.966, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=0.909, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.31, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 199/5000 [00:44<16:40,  4.80it/s, critic_loss=1.09, policy_loss=-4.66]

cri
pol


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.936, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.841, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.842, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.946, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=1.5, policy_loss=-4.63]  

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.884, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.99, policy_loss=-4.78]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.95, policy_loss=-4.8] 

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.872, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.919, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.848, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=1.23, policy_loss=-4.61] 

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.868, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.902, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.791, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=1.16, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=0.845, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 200/5000 [00:44<16:35,  4.82it/s, critic_loss=1.04, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=0.773, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=0.907, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=1.03, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=0.979, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=0.846, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=1.21, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=1.1, policy_loss=-4.72] 

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=0.636, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:44<16:49,  4.76it/s, critic_loss=0.752, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=0.722, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=0.908, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=0.888, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=0.968, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=1.02, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=1.4, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 201/5000 [00:45<16:49,  4.76it/s, critic_loss=0.874, policy_loss=-4.69]

cri
pol


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.874, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.05, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.4, policy_loss=-4.75] 

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.795, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.857, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.873, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.879, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.16, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.02, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.853, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.924, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 202/5000 [00:45<16:48,  4.76it/s, critic_loss=0.83, policy_loss=-4.61] 

cri
pol


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.18, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=0.97, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.11, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=0.859, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=0.779, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1, policy_loss=-4.66]   

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=0.832, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=0.958, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=0.99, policy_loss=-4.76] 

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.18, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.1, policy_loss=-4.61] 

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.24, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.08, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 203/5000 [00:45<16:49,  4.75it/s, critic_loss=1.31, policy_loss=-4.75]

cri
pol


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.15, policy_loss=-4.79]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.36, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=0.874, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.07, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.07, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.41, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=0.858, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.31, policy_loss=-4.75] 

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.39, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.61, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.33, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=0.951, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.1, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.07, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 204/5000 [00:45<16:31,  4.84it/s, critic_loss=1.11, policy_loss=-4.65]

cri
pol


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=0.982, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=0.95, policy_loss=-4.62] 

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.19, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=0.914, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.06, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.26, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.78, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=0.937, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=0.928, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=0.907, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.28, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.33, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 205/5000 [00:45<16:13,  4.93it/s, critic_loss=1.08, policy_loss=-4.68]

cri
pol


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=0.976, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=1.11, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=1.34, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=0.988, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:45<16:59,  4.70it/s, critic_loss=0.877, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.06, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.2, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.32, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=0.948, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=0.964, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.18, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.45, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.66, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.23, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.23, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=0.819, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.22, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 206/5000 [00:46<16:59,  4.70it/s, critic_loss=1.51, policy_loss=-4.67]

cri
pol


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.21, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=0.963, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=0.999, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.01, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.39, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=0.978, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.24, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.26, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.12, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.78, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.26, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 207/5000 [00:46<16:37,  4.80it/s, critic_loss=1.04, policy_loss=-4.71]

cri
pol


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.33, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.32, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.45, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.45, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.12, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=0.999, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.28, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.02, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=0.87, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 208/5000 [00:46<16:41,  4.78it/s, critic_loss=1.18, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.36, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.4, policy_loss=-4.75] 

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.29, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.1, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=0.893, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.6, policy_loss=-4.67]  

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.2, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.33, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.75, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.33, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.46, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.7, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.1, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.09, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.57, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 209/5000 [00:46<16:05,  4.96it/s, critic_loss=1.23, policy_loss=-4.71]

cri
pol


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.08, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.85, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.4, policy_loss=-4.64] 

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.5, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.82, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.33, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.47, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.25, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.64, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.14, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.24, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 210/5000 [00:46<15:58,  4.99it/s, critic_loss=1.28, policy_loss=-4.66]

cri
pol


Epoch 1/10:   4%|▍         | 211/5000 [00:46<16:12,  4.92it/s, critic_loss=1.45, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:46<16:12,  4.92it/s, critic_loss=1.5, policy_loss=-4.58] 

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:46<16:12,  4.92it/s, critic_loss=1.72, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:46<16:12,  4.92it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:46<16:12,  4.92it/s, critic_loss=1.47, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:46<16:12,  4.92it/s, critic_loss=1.39, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.76, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.81, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.28, policy_loss=-4.56]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.45, policy_loss=-4.55]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.87, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.35, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.37, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.44, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.2, policy_loss=-4.78] 

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.41, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.75, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.39, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.2, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▍         | 211/5000 [00:47<16:12,  4.92it/s, critic_loss=1.3, policy_loss=-4.6] 

cri
pol


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.41, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.39, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.46, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.19, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.14, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.56, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.82, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.54, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.32, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.18, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.72, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.68, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=2.06, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.22, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 212/5000 [00:47<16:19,  4.89it/s, critic_loss=1.43, policy_loss=-4.65]

cri
pol


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.33, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.1, policy_loss=-4.6]  

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.41, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.39, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.31, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.65, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.23, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.77, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.21, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.51, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.43, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.41, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.44, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.41, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.14, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.23, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 213/5000 [00:47<17:20,  4.60it/s, critic_loss=1.74, policy_loss=-4.6] 

cri
pol


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=2.06, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.58, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.31, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.27, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.33, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.41, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.24, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.39, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.41, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.68, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.28, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.26, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.39, policy_loss=-4.59]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.26, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=1.34, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 214/5000 [00:47<16:43,  4.77it/s, critic_loss=0.997, policy_loss=-4.63]

cri
pol


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.43, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.39, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.79, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.3, policy_loss=-4.62] 

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.09, policy_loss=-4.6]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.51, policy_loss=-4.57]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.42, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.89, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.31, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.18, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.32, policy_loss=-4.77]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.52, policy_loss=-4.76]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.93, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.38, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.45, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 215/5000 [00:47<16:52,  4.72it/s, critic_loss=1.24, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.4, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.52, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=0.919, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.29, policy_loss=-4.61] 

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=0.965, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.17, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.61, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=0.904, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.32, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.06, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.13, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.37, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=0.991, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.22, policy_loss=-4.61] 

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.26, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 216/5000 [00:48<16:43,  4.77it/s, critic_loss=1.21, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.04, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.38, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.53, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.62, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.43, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.09, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.16, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.23, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.43, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 217/5000 [00:48<16:24,  4.86it/s, critic_loss=1.29, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.41, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.06, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.23, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.3, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.15, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.21, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.1, policy_loss=-4.62] 

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.16, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=0.985, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.03, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.27, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 218/5000 [00:48<16:03,  4.96it/s, critic_loss=1.3, policy_loss=-4.67] 

cri
pol


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.19, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.32, policy_loss=-4.57]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.05, policy_loss=-4.55]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=0.987, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.22, policy_loss=-4.62] 

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=0.92, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.3, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.32, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.16, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=0.997, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=0.991, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.07, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=0.873, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 219/5000 [00:48<16:08,  4.94it/s, critic_loss=1.36, policy_loss=-4.61] 

cri
pol


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.11, policy_loss=-4.61]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.09, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1, policy_loss=-4.64]   

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=0.996, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.19, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=0.987, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=0.968, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.45, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.21, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.07, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.02, policy_loss=-4.75]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.17, policy_loss=-4.74]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.26, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.77, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.09, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.11, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=0.95, policy_loss=-4.58]

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1.05, policy_loss=-4.6] 

cri


Epoch 1/10:   4%|▍         | 220/5000 [00:48<16:04,  4.96it/s, critic_loss=1, policy_loss=-4.62]  

cri
pol


Epoch 1/10:   4%|▍         | 221/5000 [00:48<15:54,  5.01it/s, critic_loss=0.928, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.816, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.913, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.984, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.975, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.847, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=1.37, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.918, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.881, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.836, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.884, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.975, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=1.25, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.867, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.966, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.76, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 221/5000 [00:49<15:54,  5.01it/s, critic_loss=0.876, policy_loss=-4.67]

cri
pol


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.928, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.82, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.873, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.971, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.779, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.804, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.888, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.931, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.911, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=1.08, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.98, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.897, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.98, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.81, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=1.29, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.86, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.891, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=0.883, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 222/5000 [00:49<15:59,  4.98it/s, critic_loss=1.51, policy_loss=-4.6]  

cri
pol


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.09, policy_loss=-4.62]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.44, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.965, policy_loss=-4.73]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.17, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.838, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.896, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.975, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.904, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.12, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.04, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.813, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.894, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.1, policy_loss=-4.65]  

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=0.934, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.04, policy_loss=-4.71] 

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 223/5000 [00:49<16:06,  4.94it/s, critic_loss=1.05, policy_loss=-4.72]

cri
pol


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.839, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.939, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.81, policy_loss=-4.68] 

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.927, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.15, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.786, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.891, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.2, policy_loss=-4.66]  

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.861, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.43, policy_loss=-4.69] 

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.892, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=1.59, policy_loss=-4.73] 

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.865, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.856, policy_loss=-4.65]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.903, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.741, policy_loss=-4.63]

cri


Epoch 1/10:   4%|▍         | 224/5000 [00:49<15:55,  5.00it/s, critic_loss=0.969, policy_loss=-4.64]

cri
pol


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.31, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.982, policy_loss=-4.7]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.37, policy_loss=-4.72]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.966, policy_loss=-4.71]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.954, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.06, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.894, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.871, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.02, policy_loss=-4.65] 

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.911, policy_loss=-4.67]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.19, policy_loss=-4.66] 

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.959, policy_loss=-4.69]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.03, policy_loss=-4.67] 

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=0.95, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.02, policy_loss=-4.7] 

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.48, policy_loss=-4.68]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:49<16:28,  4.83it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   4%|▍         | 225/5000 [00:50<16:28,  4.83it/s, critic_loss=1.29, policy_loss=-4.72]

cri
pol


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=0.905, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.21, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=0.99, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.02, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.18, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.17, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=0.779, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.25, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.09, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.14, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.02, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.35, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 226/5000 [00:50<16:28,  4.83it/s, critic_loss=0.99, policy_loss=-4.71]

cri
pol


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.25, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.07, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.21, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.32, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=0.995, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.25, policy_loss=-4.76] 

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.38, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=0.829, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=0.798, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.1, policy_loss=-4.69]  

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=1.7, policy_loss=-4.72] 

cri


Epoch 1/10:   5%|▍         | 227/5000 [00:50<16:25,  4.84it/s, critic_loss=0.962, policy_loss=-4.71]

cri
pol


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.29, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.05, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.07, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.27, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.12, policy_loss=-4.77]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=0.943, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.12, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.48, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=0.846, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.31, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1, policy_loss=-4.75]  

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=0.822, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.37, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 228/5000 [00:50<16:13,  4.90it/s, critic_loss=1.21, policy_loss=-4.69]

cri
pol


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.7, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=0.925, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.11, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.07, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.27, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.32, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.13, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=0.935, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.27, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 229/5000 [00:50<16:14,  4.90it/s, critic_loss=1.22, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=0.925, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.19, policy_loss=-4.61] 

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.25, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.19, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.33, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.05, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.74, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.56, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.13, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.33, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.57, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=0.895, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=0.992, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.1, policy_loss=-4.72]  

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=1.23, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:50<15:58,  4.98it/s, critic_loss=0.846, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:51<15:58,  4.98it/s, critic_loss=0.979, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 230/5000 [00:51<15:58,  4.98it/s, critic_loss=1.16, policy_loss=-4.64] 

cri
pol


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.991, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.969, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.992, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.935, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.961, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.28, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.897, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.38, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.971, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.932, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.96, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.3, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=0.835, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.22, policy_loss=-4.61] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 231/5000 [00:51<16:02,  4.95it/s, critic_loss=1.4, policy_loss=-4.68]

cri
pol


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.09, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=0.944, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.49, policy_loss=-4.75] 

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=0.965, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.16, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.07, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.04, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.1, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.12, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=0.99, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=0.96, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.41, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=0.977, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 232/5000 [00:51<15:46,  5.04it/s, critic_loss=0.864, policy_loss=-4.69]

cri
pol


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.23, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=0.998, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.15, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.38, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.64, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1, policy_loss=-4.71]   

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.26, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=0.962, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.35, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.2, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 233/5000 [00:51<16:07,  4.93it/s, critic_loss=1.15, policy_loss=-4.66]

cri
pol


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.14, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=0.925, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.7, policy_loss=-4.72]  

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.17, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.1, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.23, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.12, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=0.981, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=0.947, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.09, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.05, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.33, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.2, policy_loss=-4.74] 

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.22, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=0.987, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 234/5000 [00:51<16:00,  4.96it/s, critic_loss=1.15, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.73, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.32, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.23, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.6, policy_loss=-4.62] 

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.1, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.2, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.05, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.24, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.11, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:51<15:57,  4.97it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:52<15:57,  4.97it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:52<15:57,  4.97it/s, critic_loss=1.1, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:52<15:57,  4.97it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 235/5000 [00:52<15:57,  4.97it/s, critic_loss=0.947, policy_loss=-4.72]

cri
pol


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.11, policy_loss=-4.76] 

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.58, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.22, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.64, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.57, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.38, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.38, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.23, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.66, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.51, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.31, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.6, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▍         | 236/5000 [00:52<16:49,  4.72it/s, critic_loss=1.46, policy_loss=-4.72]

cri
pol


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.43, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.32, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.21, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.41, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.29, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.32, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.62, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.32, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.4, policy_loss=-4.59] 

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.44, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.55, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 237/5000 [00:52<17:35,  4.51it/s, critic_loss=1.15, policy_loss=-4.63]

cri
pol


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.72, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.83, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.12, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.2, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.35, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.13, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.05, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.2, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.73, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.38, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.49, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.44, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.34, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.18, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.13, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.58, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 238/5000 [00:52<17:14,  4.60it/s, critic_loss=1.24, policy_loss=-4.69]

cri
pol


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.24, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.22, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.56, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.33, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.38, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.23, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.34, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.36, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.28, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.28, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.45, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 239/5000 [00:52<17:09,  4.62it/s, critic_loss=1.5, policy_loss=-4.62] 

cri
pol


Epoch 1/10:   5%|▍         | 240/5000 [00:52<16:51,  4.70it/s, critic_loss=1.34, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:52<16:51,  4.70it/s, critic_loss=1.41, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:52<16:51,  4.70it/s, critic_loss=1.42, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:52<16:51,  4.70it/s, critic_loss=1.43, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:52<16:51,  4.70it/s, critic_loss=1.63, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:52<16:51,  4.70it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.24, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.15, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.26, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.22, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.38, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.55, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.69, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.44, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.19, policy_loss=-4.58]

cri


Epoch 1/10:   5%|▍         | 240/5000 [00:53<16:51,  4.70it/s, critic_loss=1.42, policy_loss=-4.59]

cri
pol


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.48, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.64, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.24, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.4, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.32, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.11, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.57, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.71, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.44, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.43, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.48, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.62, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.23, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 241/5000 [00:53<16:50,  4.71it/s, critic_loss=1.19, policy_loss=-4.65]

cri
pol


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.68, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.17, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=0.988, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.3, policy_loss=-4.69]  

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.66, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.53, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.54, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.95, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.35, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.52, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.66, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.37, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.37, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.45, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 242/5000 [00:53<16:53,  4.70it/s, critic_loss=1.19, policy_loss=-4.66]

cri
pol


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.39, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.48, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.44, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.41, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.47, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.61, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.43, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.3, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.51, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.78, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.78, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 243/5000 [00:53<16:24,  4.83it/s, critic_loss=1.31, policy_loss=-4.68]

cri
pol


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.58, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.49, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.7, policy_loss=-4.72] 

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.51, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.48, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.33, policy_loss=-4.58]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.27, policy_loss=-4.58]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.64, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.35, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.24, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.58, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.37, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.51, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.31, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.45, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 244/5000 [00:53<17:05,  4.64it/s, critic_loss=1.06, policy_loss=-4.65]

cri
pol


Epoch 1/10:   5%|▍         | 245/5000 [00:53<16:41,  4.75it/s, critic_loss=1.45, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.33, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.35, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.31, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.1, policy_loss=-4.62] 

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.38, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.37, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.69, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.31, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.28, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▍         | 245/5000 [00:54<16:41,  4.75it/s, critic_loss=1.2, policy_loss=-4.7]

cri
pol


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=0.993, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.27, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=0.838, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.19, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.49, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.3, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.16, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.55, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=1.17, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▍         | 246/5000 [00:54<17:00,  4.66it/s, critic_loss=0.996, policy_loss=-4.64]

cri
pol


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.09, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=0.989, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=0.966, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.01, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=0.979, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.33, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.62, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.36, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.24, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=0.991, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=0.984, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.12, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 247/5000 [00:54<16:45,  4.73it/s, critic_loss=1.1, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=0.799, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=0.818, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.32, policy_loss=-4.62] 

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.27, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.42, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=0.976, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=0.766, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.09, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=0.978, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.16, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=0.998, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.2, policy_loss=-4.63]  

cri


Epoch 1/10:   5%|▍         | 248/5000 [00:54<16:24,  4.83it/s, critic_loss=1.26, policy_loss=-4.64]

cri
pol


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=0.731, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.26, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.12, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.1, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.21, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=0.769, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=0.918, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.03, policy_loss=-4.72] 

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:54<17:13,  4.59it/s, critic_loss=1.01, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:55<17:13,  4.59it/s, critic_loss=0.93, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:55<17:13,  4.59it/s, critic_loss=0.952, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:55<17:13,  4.59it/s, critic_loss=1.12, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:55<17:13,  4.59it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▍         | 249/5000 [00:55<17:13,  4.59it/s, critic_loss=1.27, policy_loss=-4.73]

cri
pol


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.905, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.51, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.39, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.863, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.938, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.01, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.875, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.896, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.03, policy_loss=-4.75] 

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.985, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.901, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.991, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.859, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=0.948, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1, policy_loss=-4.65]    

cri


Epoch 1/10:   5%|▌         | 250/5000 [00:55<16:52,  4.69it/s, critic_loss=1.12, policy_loss=-4.67]

cri
pol


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.27, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.43, policy_loss=-4.77]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.07, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=0.975, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.09, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.15, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=0.962, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.29, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.38, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.2, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=0.986, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=0.828, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=0.926, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=1.13, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 251/5000 [00:55<16:43,  4.73it/s, critic_loss=0.783, policy_loss=-4.72]

cri
pol


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.911, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.05, policy_loss=-4.75] 

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.937, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.892, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.86, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.09, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.01, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.79, policy_loss=-4.77]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.12, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.812, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.948, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.33, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=0.947, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1.36, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 252/5000 [00:55<16:27,  4.81it/s, critic_loss=1, policy_loss=-4.78]   

cri
pol


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.937, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.996, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.32, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.961, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.899, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.02, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.934, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.878, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.03, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.825, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1, policy_loss=-4.73]    

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.948, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.988, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.968, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.976, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.31, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 253/5000 [00:55<16:11,  4.88it/s, critic_loss=0.807, policy_loss=-4.68]

cri
pol


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.01, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=0.744, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=0.872, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.03, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.32, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=0.888, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=0.915, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:55<16:29,  4.80it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=1.57, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=0.936, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=1.09, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=0.984, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=0.907, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 254/5000 [00:56<16:29,  4.80it/s, critic_loss=0.762, policy_loss=-4.69]

cri
pol


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.11, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.11, policy_loss=-4.59]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.902, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.16, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.15, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.908, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.2, policy_loss=-4.64]  

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.835, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.12, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.961, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.916, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.952, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.991, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.968, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=0.979, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 255/5000 [00:56<16:19,  4.84it/s, critic_loss=1.03, policy_loss=-4.63] 

cri
pol


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.832, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.769, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.06, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.872, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.29, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.875, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.997, policy_loss=-4.58]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.22, policy_loss=-4.59] 

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.34, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=2.45, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.31, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.88, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.967, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.847, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=1.72, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 256/5000 [00:56<16:36,  4.76it/s, critic_loss=0.99, policy_loss=-4.65]

cri
pol


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.837, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.824, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.28, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.968, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.939, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.999, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.15, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.07, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.986, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.961, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.11, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.16, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.903, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=0.918, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 257/5000 [00:56<17:11,  4.60it/s, critic_loss=1.14, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.47, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.23, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.29, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.2, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=0.943, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.21, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.37, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.31, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.58, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=0.899, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.15, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.38, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.25, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 258/5000 [00:56<16:36,  4.76it/s, critic_loss=1.25, policy_loss=-4.71]

cri
pol


Epoch 1/10:   5%|▌         | 259/5000 [00:56<16:37,  4.75it/s, critic_loss=1.19, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:56<16:37,  4.75it/s, critic_loss=1.87, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:56<16:37,  4.75it/s, critic_loss=1.3, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:56<16:37,  4.75it/s, critic_loss=0.945, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:56<16:37,  4.75it/s, critic_loss=1.46, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:56<16:37,  4.75it/s, critic_loss=1.21, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.48, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.21, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.17, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.39, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.13, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.33, policy_loss=-4.57]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=0.878, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.3, policy_loss=-4.73]  

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=0.858, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 259/5000 [00:57<16:37,  4.75it/s, critic_loss=1.01, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.51, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=0.994, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.2, policy_loss=-4.66]  

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.06, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.23, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.16, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.17, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=0.891, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.69, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 260/5000 [00:57<16:46,  4.71it/s, critic_loss=1.14, policy_loss=-4.64]

cri
pol


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.41, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.36, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=0.9, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.1, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.51, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.3, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=0.923, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.18, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=0.998, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.2, policy_loss=-4.68]  

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.47, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=0.863, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 261/5000 [00:57<17:06,  4.62it/s, critic_loss=1.08, policy_loss=-4.66] 

cri
pol


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.45, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=0.972, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.2, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.61, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=0.892, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.11, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.23, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.02, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.02, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.55, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=0.989, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 262/5000 [00:57<16:36,  4.75it/s, critic_loss=1.31, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.12, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.1, policy_loss=-4.72] 

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=0.899, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.1, policy_loss=-4.64]  

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.28, policy_loss=-4.6]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=0.933, policy_loss=-4.6]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.33, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=0.923, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.43, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.39, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.16, policy_loss=-4.79]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.16, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.12, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.44, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.21, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 263/5000 [00:57<16:33,  4.77it/s, critic_loss=1.17, policy_loss=-4.68]

cri
pol


Epoch 1/10:   5%|▌         | 264/5000 [00:57<16:06,  4.90it/s, critic_loss=0.916, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:57<16:06,  4.90it/s, critic_loss=1.27, policy_loss=-4.75] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.09, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.33, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=0.971, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=0.911, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.28, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.14, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.22, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.92, policy_loss=-4.78]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=0.982, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.01, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.1, policy_loss=-4.61] 

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.48, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 264/5000 [00:58<16:06,  4.90it/s, critic_loss=1.22, policy_loss=-4.71]

cri
pol


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.32, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.25, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.47, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.4, policy_loss=-4.59] 

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.47, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.06, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.16, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.29, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.18, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=0.908, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 265/5000 [00:58<15:44,  5.01it/s, critic_loss=1.17, policy_loss=-4.67] 

cri
pol


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=0.94, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=0.894, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.15, policy_loss=-4.75] 

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.01, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.38, policy_loss=-4.77]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.22, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1, policy_loss=-4.66]   

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.22, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.13, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=0.926, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.17, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.29, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 266/5000 [00:58<15:41,  5.03it/s, critic_loss=1.13, policy_loss=-4.68]

cri
pol


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.35, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.56, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.59, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.48, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.1, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=0.991, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=0.85, policy_loss=-4.66] 

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=0.908, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=0.954, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=0.994, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.11, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 267/5000 [00:58<15:32,  5.07it/s, critic_loss=1.17, policy_loss=-4.7]

cri
pol


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.51, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.01, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=0.893, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=0.904, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=0.744, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.01, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.11, policy_loss=-4.59]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=0.949, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.17, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=0.887, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.3, policy_loss=-4.73]  

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.07, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=0.957, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.04, policy_loss=-4.62] 

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 268/5000 [00:58<15:36,  5.05it/s, critic_loss=1.21, policy_loss=-4.71]

cri
pol


Epoch 1/10:   5%|▌         | 269/5000 [00:58<16:12,  4.87it/s, critic_loss=1.33, policy_loss=-4.76]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:58<16:12,  4.87it/s, critic_loss=1.09, policy_loss=-4.78]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.08, policy_loss=-4.75]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.848, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.33, policy_loss=-4.73] 

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.959, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.813, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.969, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.843, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.917, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.866, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.39, policy_loss=-4.72] 

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.929, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.888, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=0.958, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.11, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 269/5000 [00:59<16:12,  4.87it/s, critic_loss=1.06, policy_loss=-4.68]

cri
pol


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.17, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.977, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.853, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.977, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.947, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.05, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.748, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.15, policy_loss=-4.73]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=1.08, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.763, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.918, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.846, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.913, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 270/5000 [00:59<15:54,  4.96it/s, critic_loss=0.801, policy_loss=-4.63]

cri
pol


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.962, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.848, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.968, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.94, policy_loss=-4.64] 

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.651, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.776, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.949, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.963, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.2, policy_loss=-4.66]  

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.931, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.913, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.971, policy_loss=-4.65]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.14, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.818, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   5%|▌         | 271/5000 [00:59<15:47,  4.99it/s, critic_loss=0.664, policy_loss=-4.7]

cri
pol


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.32, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.868, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.05, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.926, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.929, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.01, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.809, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.919, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.19, policy_loss=-4.65] 

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.929, policy_loss=-4.63]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.11, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.975, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.714, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.17, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=1.48, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.902, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 272/5000 [00:59<15:59,  4.93it/s, critic_loss=0.758, policy_loss=-4.71]

cri
pol


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.825, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.758, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=1.11, policy_loss=-4.7] 

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.941, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.923, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.959, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.809, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=1.19, policy_loss=-4.74] 

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.906, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.87, policy_loss=-4.75] 

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=1.23, policy_loss=-4.74]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.991, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.932, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.962, policy_loss=-4.61]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.859, policy_loss=-4.6] 

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.918, policy_loss=-4.64]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.974, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 273/5000 [00:59<15:51,  4.97it/s, critic_loss=0.934, policy_loss=-4.75]

cri
pol


Epoch 1/10:   5%|▌         | 274/5000 [00:59<15:53,  4.96it/s, critic_loss=0.754, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 274/5000 [00:59<15:53,  4.96it/s, critic_loss=0.864, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.25, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.876, policy_loss=-4.62]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.03, policy_loss=-4.63] 

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.721, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.909, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.843, policy_loss=-4.72]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.748, policy_loss=-4.71]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.01, policy_loss=-4.72] 

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.4, policy_loss=-4.71] 

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.871, policy_loss=-4.7]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.873, policy_loss=-4.67]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.864, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.828, policy_loss=-4.68]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=1.28, policy_loss=-4.67] 

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.877, policy_loss=-4.66]

cri


Epoch 1/10:   5%|▌         | 274/5000 [01:00<15:53,  4.96it/s, critic_loss=0.896, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.778, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.07, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.879, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.969, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.22, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.02, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.795, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.937, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.782, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.909, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.741, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.24, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.976, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.853, policy_loss=-4.76]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.01, policy_loss=-4.76] 

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.09, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.84, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=0.898, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 275/5000 [01:00<15:53,  4.95it/s, critic_loss=1.41, policy_loss=-4.64] 

cri
pol


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.843, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.833, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.867, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.681, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.779, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.969, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.848, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.672, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.916, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.739, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.819, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=1.07, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.834, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.77, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.822, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=1.57, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 276/5000 [01:00<16:02,  4.91it/s, critic_loss=0.694, policy_loss=-4.73]

cri
pol


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.675, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.816, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.709, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.79, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.86, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.922, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.928, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.945, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.875, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.756, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=1.06, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.904, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.911, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=1.34, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.891, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.823, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.936, policy_loss=-4.76]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=1.11, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=0.77, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 277/5000 [01:00<15:55,  4.94it/s, critic_loss=1.01, policy_loss=-4.67]

cri
pol


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.729, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=1.17, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.758, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.857, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.636, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.78, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.669, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.999, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.885, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.96, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.633, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.956, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=1.17, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.804, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=1.1, policy_loss=-4.73]  

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.993, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.606, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.779, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 278/5000 [01:00<16:16,  4.84it/s, critic_loss=0.768, policy_loss=-4.63]

cri
pol


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=1, policy_loss=-4.64]    

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.661, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.78, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.727, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.518, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.885, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=1.34, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.829, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.791, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.882, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=1.1, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.701, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.917, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.752, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.999, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.667, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 279/5000 [01:01<16:05,  4.89it/s, critic_loss=0.653, policy_loss=-4.67]

cri
pol


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=1.2, policy_loss=-4.7]   

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.739, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.774, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.957, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.764, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.703, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.657, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.865, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.729, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.706, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.797, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.968, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.795, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=1.04, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.812, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.842, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.789, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 280/5000 [01:01<16:04,  4.89it/s, critic_loss=0.994, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.888, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.903, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.941, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.822, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=1.1, policy_loss=-4.75]  

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.838, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.749, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.707, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.768, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.757, policy_loss=-4.59]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.687, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.745, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=1.03, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.821, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.686, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.737, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.691, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=1.15, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 281/5000 [01:01<16:03,  4.90it/s, critic_loss=0.822, policy_loss=-4.68]

cri
pol


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=1.01, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.836, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.783, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.944, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=1.01, policy_loss=-4.62] 

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.888, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.835, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.929, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.746, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.819, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.91, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.943, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.83, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.72, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=1.07, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.672, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.932, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.706, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=0.989, policy_loss=-4.78]

cri


Epoch 1/10:   6%|▌         | 282/5000 [01:01<15:53,  4.95it/s, critic_loss=1.1, policy_loss=-4.75]  

cri
pol


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.881, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.2, policy_loss=-4.72]  

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.764, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.11, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.823, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.792, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.32, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.03, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.77, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.12, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.913, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.879, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.15, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.97, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:01<16:18,  4.82it/s, critic_loss=0.843, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:02<16:18,  4.82it/s, critic_loss=0.837, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:02<16:18,  4.82it/s, critic_loss=0.742, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 283/5000 [01:02<16:18,  4.82it/s, critic_loss=0.935, policy_loss=-4.73]

cri
pol


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.934, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=1.3, policy_loss=-4.71]  

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.792, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.992, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.973, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.864, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.813, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=1.09, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.843, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.969, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.832, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.804, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.732, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.881, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.677, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.888, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.8, policy_loss=-4.74]  

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 284/5000 [01:02<16:39,  4.72it/s, critic_loss=0.937, policy_loss=-4.72]

cri
pol


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.816, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.865, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.84, policy_loss=-4.62] 

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.23, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1, policy_loss=-4.64]   

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.905, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.02, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.03, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.903, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.884, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.01, policy_loss=-4.62] 

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.974, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.33, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.801, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 285/5000 [01:02<16:36,  4.73it/s, critic_loss=0.926, policy_loss=-4.64]

cri
pol


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.16, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.68, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=0.901, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.13, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.16, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=0.87, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=0.934, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.12, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.32, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=0.846, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=0.819, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.18, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.48, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 286/5000 [01:02<16:20,  4.81it/s, critic_loss=1.11, policy_loss=-4.69]

cri
pol


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.11, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.17, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=0.887, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=0.932, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.19, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.2, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.14, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.26, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.27, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.52, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.56, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=0.971, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.19, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.12, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.29, policy_loss=-4.59]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.31, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 287/5000 [01:02<16:32,  4.75it/s, critic_loss=1.32, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.46, policy_loss=-4.76]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=0.969, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.19, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.5, policy_loss=-4.6]  

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.06, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:02<16:33,  4.74it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=0.829, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.35, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.16, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.46, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 288/5000 [01:03<16:33,  4.74it/s, critic_loss=1.47, policy_loss=-4.67]

cri
pol


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.26, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.24, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.03, policy_loss=-4.6] 

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.08, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.4, policy_loss=-4.6]  

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.36, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1, policy_loss=-4.77]  

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.4, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.38, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.2, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.19, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 289/5000 [01:03<16:20,  4.80it/s, critic_loss=0.833, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.61, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.41, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.11, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.3, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.17, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.3, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.59, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.55, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=0.781, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 290/5000 [01:03<15:57,  4.92it/s, critic_loss=1.26, policy_loss=-4.63] 

cri
pol


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.67, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.45, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.13, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.2, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.21, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.49, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1, policy_loss=-4.66]   

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=0.995, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.21, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.64, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.36, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.46, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.1, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.71, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 291/5000 [01:03<16:01,  4.90it/s, critic_loss=1.14, policy_loss=-4.65]

cri
pol


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.71, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.47, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.27, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.17, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.44, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.47, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.22, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.62, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.25, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.22, policy_loss=-4.58]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.25, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.1, policy_loss=-4.6]  

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.48, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 292/5000 [01:03<16:08,  4.86it/s, critic_loss=1.17, policy_loss=-4.64]

cri
pol


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.49, policy_loss=-4.6] 

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.39, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.02, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.3, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1, policy_loss=-4.66]  

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.39, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.15, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=0.985, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:03<15:43,  4.99it/s, critic_loss=1.36, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.24, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.89, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.48, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.33, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.45, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 293/5000 [01:04<15:43,  4.99it/s, critic_loss=1.29, policy_loss=-4.63]

cri
pol


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=0.938, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.23, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.57, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.15, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.03, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.13, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.34, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.31, policy_loss=-4.6] 

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.36, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.65, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.26, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.15, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.27, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.38, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 294/5000 [01:04<15:59,  4.91it/s, critic_loss=1.51, policy_loss=-4.59]

cri
pol


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.1, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.28, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.19, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.23, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=0.772, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.03, policy_loss=-4.62] 

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.75, policy_loss=-4.6] 

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=0.84, policy_loss=-4.6]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.41, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.49, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.29, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=0.976, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.65, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 295/5000 [01:04<15:58,  4.91it/s, critic_loss=1.25, policy_loss=-4.68]

cri
pol


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.24, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=0.746, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.01, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.11, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.16, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=0.928, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=0.904, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=0.946, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.01, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=0.889, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.08, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 296/5000 [01:04<16:04,  4.88it/s, critic_loss=1.16, policy_loss=-4.67]

cri
pol


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.96, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.893, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.84, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.48, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.951, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.11, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.906, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.02, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.05, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.25, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.32, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=1.11, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.934, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 297/5000 [01:04<16:05,  4.87it/s, critic_loss=0.927, policy_loss=-4.77]

cri
pol


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=0.845, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=1.47, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=0.984, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=0.741, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:04<15:49,  4.95it/s, critic_loss=0.799, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=0.92, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=0.954, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.07, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.33, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.27, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1, policy_loss=-4.75]   

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=0.707, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 298/5000 [01:05<15:49,  4.95it/s, critic_loss=1.01, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.802, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.968, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.912, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.785, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.732, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.93, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.927, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.823, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.818, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.973, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=1.25, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.858, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.865, policy_loss=-4.79]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.974, policy_loss=-4.76]

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=1.48, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=1, policy_loss=-4.67]   

cri


Epoch 1/10:   6%|▌         | 299/5000 [01:05<15:45,  4.97it/s, critic_loss=0.673, policy_loss=-4.64]

cri
pol


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1, policy_loss=-4.61]    

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.972, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.22, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.791, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.956, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.967, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.964, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.15, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.791, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.894, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.904, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.979, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.02, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.1, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=1.73, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.979, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.963, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 300/5000 [01:05<15:39,  5.00it/s, critic_loss=0.864, policy_loss=-4.67]

cri
pol


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.07, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.895, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.984, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.02, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.972, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.953, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.951, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.941, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.07, policy_loss=-4.62] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.29, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.924, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.937, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.969, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=0.783, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.21, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 301/5000 [01:05<15:32,  5.04it/s, critic_loss=1.39, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.06, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=0.946, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.2, policy_loss=-4.73]  

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.46, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=0.931, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=0.872, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.27, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.09, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=0.901, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.33, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=0.995, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.1, policy_loss=-4.64]  

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.38, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 302/5000 [01:05<15:24,  5.08it/s, critic_loss=1.31, policy_loss=-4.75]

cri
pol


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.19, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.24, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=0.946, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.28, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:05<15:20,  5.10it/s, critic_loss=1.07, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=0.949, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=0.861, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=0.888, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=0.898, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=0.985, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=1.03, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 303/5000 [01:06<15:20,  5.10it/s, critic_loss=0.897, policy_loss=-4.69]

cri
pol


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.15, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.39, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.41, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=0.842, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.35, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=0.944, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.32, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.53, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.07, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.28, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=0.98, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.29, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.14, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 304/5000 [01:06<15:24,  5.08it/s, critic_loss=1.22, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.2, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.02, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.1, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.21, policy_loss=-4.59]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=0.967, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.18, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=0.889, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.2, policy_loss=-4.71]  

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.08, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1, policy_loss=-4.65]   

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=0.899, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.42, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=0.899, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 305/5000 [01:06<15:24,  5.08it/s, critic_loss=1.1, policy_loss=-4.72]  

cri
pol


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.3, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=0.979, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=0.938, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.11, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.5, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=0.925, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=0.994, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=0.869, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.11, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.33, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 306/5000 [01:06<15:26,  5.07it/s, critic_loss=1.27, policy_loss=-4.65]

cri
pol


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.13, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.27, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.34, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=0.991, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.03, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=0.955, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.06, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.27, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=0.87, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.48, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.07, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=0.998, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.03, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=0.958, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 307/5000 [01:06<15:12,  5.14it/s, critic_loss=1.53, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=0.952, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.41, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.17, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.21, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.17, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=0.871, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.33, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:06<15:49,  4.94it/s, critic_loss=1.25, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=1.02, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=0.968, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=0.89, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=0.906, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=1.16, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=1.02, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=0.97, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=1.05, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 308/5000 [01:07<15:49,  4.94it/s, critic_loss=1.03, policy_loss=-4.68]

cri
pol


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.916, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.916, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.844, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.14, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.973, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.75, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.898, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.19, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.979, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.899, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1, policy_loss=-4.69]    

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.896, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.853, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.916, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.11, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=1.11, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 309/5000 [01:07<15:59,  4.89it/s, critic_loss=0.805, policy_loss=-4.65]

cri
pol


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.816, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.663, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.927, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.12, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.862, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.74, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.17, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.39, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.793, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.703, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1, policy_loss=-4.69]    

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.863, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=0.989, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.01, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▌         | 310/5000 [01:07<15:34,  5.02it/s, critic_loss=1.05, policy_loss=-4.74]

cri
pol


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.82, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.11, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.838, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.967, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.85, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.04, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.855, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.812, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.944, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.936, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.774, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.22, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.907, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.09, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.999, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=1.09, policy_loss=-4.76] 

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.799, policy_loss=-4.77]

cri


Epoch 1/10:   6%|▌         | 311/5000 [01:07<16:30,  4.74it/s, critic_loss=0.868, policy_loss=-4.74]

cri
pol


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.757, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=1.31, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.759, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.861, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.69, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=1.4, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.86, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.744, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.807, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.761, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.753, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.896, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=1, policy_loss=-4.68]    

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.901, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.825, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=1.19, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.856, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=1.07, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=0.775, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▌         | 312/5000 [01:07<16:14,  4.81it/s, critic_loss=1.15, policy_loss=-4.66] 

cri
pol


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=0.88, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=0.822, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=0.893, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=1.14, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=0.92, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:07<16:20,  4.78it/s, critic_loss=0.776, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=1.1, policy_loss=-4.71]  

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.685, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.957, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.779, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=1.09, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.681, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=1.03, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.672, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.859, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.837, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 313/5000 [01:08<16:20,  4.78it/s, critic_loss=0.788, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.755, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.824, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.21, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.829, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.986, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.701, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.691, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.812, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.974, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.778, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.757, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.74, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.803, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.05, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=1.1, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▋         | 314/5000 [01:08<15:57,  4.90it/s, critic_loss=0.96, policy_loss=-4.73]

cri
pol


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.906, policy_loss=-4.76]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.712, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=1.3, policy_loss=-4.7]   

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.817, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.805, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.743, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.774, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.705, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.879, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.95, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.701, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=1, policy_loss=-4.71]    

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.962, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.798, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.816, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.872, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=1.21, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=0.961, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 315/5000 [01:08<16:02,  4.87it/s, critic_loss=1.58, policy_loss=-4.67] 

cri
pol


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.743, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.882, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.07, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.941, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.08, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.867, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.898, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.755, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.917, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.802, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.902, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.698, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.93, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.16, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=0.829, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.44, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 316/5000 [01:08<17:21,  4.50it/s, critic_loss=1.15, policy_loss=-4.66]

cri
pol


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.998, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.817, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.899, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.809, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.895, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.786, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.14, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.771, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.852, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.09, policy_loss=-4.61] 

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.54, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.676, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.22, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.948, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.775, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.21, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.964, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=1.06, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▋         | 317/5000 [01:08<16:34,  4.71it/s, critic_loss=0.879, policy_loss=-4.64]

cri
pol


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.86, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.888, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.818, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.962, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.933, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.983, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.87, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.775, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.808, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.856, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=1.14, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=1.02, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.797, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.733, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.976, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.942, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.924, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.898, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 318/5000 [01:09<16:43,  4.66it/s, critic_loss=0.897, policy_loss=-4.69]

cri
pol


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.12, policy_loss=-4.66] 

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.998, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.913, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.922, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.02, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.939, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.57, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.907, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.881, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.04, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.4, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.831, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.878, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=1.09, policy_loss=-4.72] 

cri


Epoch 1/10:   6%|▋         | 319/5000 [01:09<16:30,  4.73it/s, critic_loss=0.922, policy_loss=-4.73]

cri
pol


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.976, policy_loss=-4.74]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.95, policy_loss=-4.7]  

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.1, policy_loss=-4.62] 

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.878, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.926, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.875, policy_loss=-4.7]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.798, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.18, policy_loss=-4.65] 

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.08, policy_loss=-4.59]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.955, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.17, policy_loss=-4.63] 

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.52, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.888, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.08, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=1.02, policy_loss=-4.75]

cri


Epoch 1/10:   6%|▋         | 320/5000 [01:09<16:14,  4.80it/s, critic_loss=0.946, policy_loss=-4.67]

cri
pol


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.03, policy_loss=-4.6]  

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.42, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.03, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.956, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.956, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.38, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.998, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.28, policy_loss=-4.69] 

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.971, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.02, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.919, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.951, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.14, policy_loss=-4.73] 

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=0.841, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.34, policy_loss=-4.68] 

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 321/5000 [01:09<15:48,  4.93it/s, critic_loss=1.14, policy_loss=-4.64]

cri
pol


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.23, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.25, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=0.932, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.25, policy_loss=-4.64] 

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.11, policy_loss=-4.63]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=0.946, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.11, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.49, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.32, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.36, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.05, policy_loss=-4.6] 

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.18, policy_loss=-4.6]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 322/5000 [01:09<15:29,  5.04it/s, critic_loss=1.06, policy_loss=-4.75]

cri
pol


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.35, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.22, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.32, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.11, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=0.913, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.15, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.24, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.71, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=0.975, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.42, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 323/5000 [01:10<15:40,  4.97it/s, critic_loss=1.14, policy_loss=-4.71]

cri
pol


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.41, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.42, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.41, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.32, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=0.942, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.15, policy_loss=-4.74] 

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.39, policy_loss=-4.61]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.16, policy_loss=-4.62]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=1.17, policy_loss=-4.73]

cri


Epoch 1/10:   6%|▋         | 324/5000 [01:10<15:27,  5.04it/s, critic_loss=0.968, policy_loss=-4.77]

cri
pol


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.21, policy_loss=-4.75] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.3, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.2, policy_loss=-4.6] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.34, policy_loss=-4.58]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.4, policy_loss=-4.6]  

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.08, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.26, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.4, policy_loss=-4.71] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.27, policy_loss=-4.71]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.16, policy_loss=-4.7] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.6, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.39, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=0.881, policy_loss=-4.65]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.13, policy_loss=-4.67] 

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   6%|▋         | 325/5000 [01:10<15:38,  4.98it/s, critic_loss=1.26, policy_loss=-4.68]

cri
pol


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.25, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.24, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.1, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.15, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=0.953, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.49, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.28, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=0.898, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 326/5000 [01:10<15:48,  4.93it/s, critic_loss=1.04, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.28, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=0.912, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.39, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.48, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.23, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.44, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.12, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 327/5000 [01:10<15:38,  4.98it/s, critic_loss=0.917, policy_loss=-4.64]

cri
pol


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.03, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.1, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=0.915, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=0.894, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.01, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=0.941, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.21, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.11, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.05, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.22, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.3, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=0.861, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 328/5000 [01:11<15:42,  4.96it/s, critic_loss=1.44, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.31, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.844, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.816, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.04, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.829, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.01, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.935, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.97, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.56, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.882, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.922, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=0.935, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.29, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 329/5000 [01:11<15:34,  5.00it/s, critic_loss=1.19, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.957, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.9, policy_loss=-4.67]  

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.927, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.816, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.824, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.93, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.12, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.23, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.859, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.903, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.02, policy_loss=-4.75] 

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.12, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.837, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 330/5000 [01:11<15:32,  5.01it/s, critic_loss=0.737, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.91, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.994, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.947, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.807, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.976, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.729, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.867, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.949, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.983, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.884, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.924, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=1.06, policy_loss=-4.76] 

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.802, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.985, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.989, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=2.3, policy_loss=-4.68]  

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.843, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.922, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 331/5000 [01:11<15:27,  5.03it/s, critic_loss=0.919, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.936, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.749, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.978, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.973, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.963, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.976, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.884, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.859, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=1.04, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.925, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.8, policy_loss=-4.71]  

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=1, policy_loss=-4.66]  

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.711, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.963, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.788, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.923, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=1.52, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 332/5000 [01:11<14:54,  5.22it/s, critic_loss=0.936, policy_loss=-4.73]

cri
pol


Epoch 1/10:   7%|▋         | 333/5000 [01:11<14:56,  5.21it/s, critic_loss=0.898, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:11<14:56,  5.21it/s, critic_loss=1.01, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:11<14:56,  5.21it/s, critic_loss=0.795, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.804, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.01, policy_loss=-4.63] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.21, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.964, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.07, policy_loss=-4.76] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.878, policy_loss=-4.78]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.995, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.07, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.901, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.03, policy_loss=-4.62] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.27, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.943, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.82, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 333/5000 [01:12<14:56,  5.21it/s, critic_loss=0.845, policy_loss=-4.64]

cri
pol


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.21, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.908, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.17, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.753, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.09, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.858, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.48, policy_loss=-4.78] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.878, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.682, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.704, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.789, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.41, policy_loss=-4.62] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.996, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.26, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.07, policy_loss=-4.78]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.06, policy_loss=-4.79]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.31, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.678, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 334/5000 [01:12<15:09,  5.13it/s, critic_loss=0.948, policy_loss=-4.61]

cri
pol


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.06, policy_loss=-4.6]  

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.798, policy_loss=-4.59]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.936, policy_loss=-4.6] 

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.912, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.949, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.784, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1, policy_loss=-4.76]    

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.4, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.25, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.22, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.881, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.08, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.801, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.09, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.824, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.823, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 335/5000 [01:12<15:12,  5.11it/s, critic_loss=0.896, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.754, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.725, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=1.01, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=1.1, policy_loss=-4.63] 

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.699, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.844, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.812, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.927, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=1.35, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.798, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=1.05, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.82, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.856, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.953, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.831, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.821, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=1.04, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.909, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 336/5000 [01:12<14:47,  5.25it/s, critic_loss=0.766, policy_loss=-4.74]

cri
pol


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.738, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.962, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.14, policy_loss=-4.63] 

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.22, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.814, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.25, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.13, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.874, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.2, policy_loss=-4.67]  

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.21, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.883, policy_loss=-4.58]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.994, policy_loss=-4.55]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.2, policy_loss=-4.58]  

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.868, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.979, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.876, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=0.952, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 337/5000 [01:12<14:54,  5.21it/s, critic_loss=1.24, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   7%|▋         | 338/5000 [01:12<14:58,  5.19it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:12<14:58,  5.19it/s, critic_loss=0.957, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:12<14:58,  5.19it/s, critic_loss=1.22, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:12<14:58,  5.19it/s, critic_loss=1.09, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:12<14:58,  5.19it/s, critic_loss=0.835, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.18, policy_loss=-4.75] 

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.16, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.09, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=0.921, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.02, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=0.985, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=0.958, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.03, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 338/5000 [01:13<14:58,  5.19it/s, critic_loss=1.08, policy_loss=-4.65]

cri
pol


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.3, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.878, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.957, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.805, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.04, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.982, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.57, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.15, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.08, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.889, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.837, policy_loss=-4.59]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=0.872, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.28, policy_loss=-4.63] 

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 339/5000 [01:13<15:54,  4.88it/s, critic_loss=1.04, policy_loss=-4.73]

cri
pol


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1, policy_loss=-4.77]   

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1.41, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.966, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1.31, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1.48, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1.24, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.972, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.992, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.836, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.818, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.975, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.934, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1, policy_loss=-4.7]     

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.979, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.831, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.863, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.891, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.911, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 340/5000 [01:13<15:52,  4.89it/s, critic_loss=0.76, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.18, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=0.91, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.13, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.66, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=0.804, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.24, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.1, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=0.808, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=0.895, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.41, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=0.967, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.04, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 341/5000 [01:13<15:38,  4.96it/s, critic_loss=1.02, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1, policy_loss=-4.72]  

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.29, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=0.963, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.28, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.22, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=0.98, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.13, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.22, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=0.901, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.08, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.18, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.01, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.02, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=0.983, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=0.929, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.17, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 342/5000 [01:13<15:36,  4.97it/s, critic_loss=1.09, policy_loss=-4.69]

cri
pol


Epoch 1/10:   7%|▋         | 343/5000 [01:13<16:12,  4.79it/s, critic_loss=0.867, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=0.888, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=0.897, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.21, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=0.987, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.01, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.05, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.21, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=0.962, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.17, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=0.986, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.28, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.12, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=0.791, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.15, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.12, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 343/5000 [01:14<16:12,  4.79it/s, critic_loss=1.18, policy_loss=-4.62]

cri
pol


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=0.861, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.13, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.08, policy_loss=-4.78]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.04, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=0.926, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.38, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.2, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.14, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.28, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.27, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.06, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.2, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.29, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.29, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.21, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=1.15, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=0.844, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=0.983, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 344/5000 [01:14<15:42,  4.94it/s, critic_loss=0.874, policy_loss=-4.64]

cri
pol


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.3, policy_loss=-4.65]  

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.1, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.1, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.33, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.2, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.45, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.15, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.06, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.11, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.24, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.37, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=0.93, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 345/5000 [01:14<15:35,  4.97it/s, critic_loss=1.07, policy_loss=-4.66]

cri
pol


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.33, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.19, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=0.973, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.28, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.14, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.23, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.49, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.1, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.55, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 346/5000 [01:14<15:35,  4.97it/s, critic_loss=1.05, policy_loss=-4.75]

cri
pol


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.27, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.12, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=0.878, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.22, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=0.818, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.1, policy_loss=-4.62]  

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.1, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=0.862, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.2, policy_loss=-4.76] 

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.45, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.4, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.51, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=0.934, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.2, policy_loss=-4.72]  

cri


Epoch 1/10:   7%|▋         | 347/5000 [01:14<15:17,  5.07it/s, critic_loss=1.61, policy_loss=-4.76]

cri
pol


Epoch 1/10:   7%|▋         | 348/5000 [01:14<15:15,  5.08it/s, critic_loss=1.03, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:14<15:15,  5.08it/s, critic_loss=0.898, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:14<15:15,  5.08it/s, critic_loss=1.19, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:14<15:15,  5.08it/s, critic_loss=0.846, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.13, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.29, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.24, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.25, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=0.948, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.02, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=0.879, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=0.913, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=0.899, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.36, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 348/5000 [01:15<15:15,  5.08it/s, critic_loss=0.861, policy_loss=-4.68]

cri
pol


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.998, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.18, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.828, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.974, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.964, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.966, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.08, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.999, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.913, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1, policy_loss=-4.67]    

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.21, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.26, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.981, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.949, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.991, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.05, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=0.955, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 349/5000 [01:15<15:19,  5.06it/s, critic_loss=1.01, policy_loss=-4.64] 

cri
pol


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.951, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.876, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.985, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.08, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.11, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.18, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.901, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.885, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.92, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.903, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.18, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.9, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.986, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.1, policy_loss=-4.69]  

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=1.06, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.933, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.983, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.967, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 350/5000 [01:15<15:24,  5.03it/s, critic_loss=0.941, policy_loss=-4.71]

cri
pol


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.04, policy_loss=-4.75] 

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.3, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.07, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.842, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.899, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.02, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.846, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.861, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.01, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.981, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.06, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.943, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.999, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.23, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.961, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=0.858, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 351/5000 [01:15<15:13,  5.09it/s, critic_loss=1.41, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.69, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.946, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.03, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.999, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1, policy_loss=-4.67]    

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.24, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.974, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.784, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.869, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.09, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.22, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.928, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.48, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.22, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=0.937, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.22, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.28, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 352/5000 [01:15<15:19,  5.05it/s, critic_loss=1.04, policy_loss=-4.74]

cri
pol


Epoch 1/10:   7%|▋         | 353/5000 [01:15<15:00,  5.16it/s, critic_loss=1.29, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:15<15:00,  5.16it/s, critic_loss=0.817, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:15<15:00,  5.16it/s, critic_loss=0.791, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:15<15:00,  5.16it/s, critic_loss=1.06, policy_loss=-4.63] 

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:15<15:00,  5.16it/s, critic_loss=1.31, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:15<15:00,  5.16it/s, critic_loss=0.786, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.91, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.963, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=1.08, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=1.08, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=1.17, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.864, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.891, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=1.11, policy_loss=-4.6]  

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=1.16, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.959, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.873, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.961, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 353/5000 [01:16<15:00,  5.16it/s, critic_loss=0.922, policy_loss=-4.72]

cri
pol


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.803, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.843, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.07, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.919, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.837, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.852, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.982, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.848, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.37, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.905, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.2, policy_loss=-4.73]  

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1, policy_loss=-4.73]  

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.816, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.812, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.934, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=1.03, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 354/5000 [01:16<15:10,  5.10it/s, critic_loss=0.923, policy_loss=-4.66]

cri
pol


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.849, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.837, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.941, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.86, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.911, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.925, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.832, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.926, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.837, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.98, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.877, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.95, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=1, policy_loss=-4.66]  

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.957, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.979, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.845, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 355/5000 [01:16<14:42,  5.26it/s, critic_loss=0.951, policy_loss=-4.72]

cri
pol


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.931, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.986, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.807, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.862, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=1.29, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.998, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.763, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.728, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.754, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.872, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.76, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.872, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=1, policy_loss=-4.72]    

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.873, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.873, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.839, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.944, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=0.78, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 356/5000 [01:16<14:47,  5.23it/s, critic_loss=1.11, policy_loss=-4.69]

cri
pol


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.942, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.925, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.902, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=1.08, policy_loss=-4.78] 

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.817, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.809, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.808, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.917, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.932, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.783, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.924, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.972, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.959, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.88, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=1.63, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=1.05, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.987, policy_loss=-4.81]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.869, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.666, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 357/5000 [01:16<14:43,  5.26it/s, critic_loss=0.739, policy_loss=-4.64]

cri
pol


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.893, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.839, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=1.11, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.839, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.823, policy_loss=-4.79]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.904, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=1.07, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.859, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.713, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=0.868, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:16<14:51,  5.21it/s, critic_loss=1.69, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=0.841, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=0.9, policy_loss=-4.78]  

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=1.06, policy_loss=-4.78]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=0.895, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=1.03, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=0.786, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 358/5000 [01:17<14:51,  5.21it/s, critic_loss=0.726, policy_loss=-4.63]

cri
pol


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.699, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=1.63, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=1.05, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.789, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=1, policy_loss=-4.69]    

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.899, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.906, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.724, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.781, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.747, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.909, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.794, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=1.01, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.724, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.911, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.786, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.677, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 359/5000 [01:17<14:32,  5.32it/s, critic_loss=0.646, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.999, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.974, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.948, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=1.42, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.818, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.872, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.933, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.981, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.924, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.999, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.944, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.947, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=1.09, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.89, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=1.19, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.922, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.96, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 360/5000 [01:17<14:54,  5.18it/s, critic_loss=0.677, policy_loss=-4.69]

cri
pol


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.876, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.937, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.937, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.91, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.943, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=1.33, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.884, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=1.21, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.792, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=1.1, policy_loss=-4.69]  

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.807, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.704, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=1.08, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.839, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.924, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.893, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.827, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.898, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 361/5000 [01:17<15:49,  4.89it/s, critic_loss=0.927, policy_loss=-4.71]

cri
pol


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.912, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.03, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.776, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.44, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1, policy_loss=-4.7]    

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.843, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.1, policy_loss=-4.74]  

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.974, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.07, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.797, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.25, policy_loss=-4.61] 

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.22, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.868, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.997, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.03, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.11, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=1.29, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.746, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 362/5000 [01:17<16:06,  4.80it/s, critic_loss=0.828, policy_loss=-4.63]

cri
pol


Epoch 1/10:   7%|▋         | 363/5000 [01:17<16:16,  4.75it/s, critic_loss=0.919, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:17<16:16,  4.75it/s, critic_loss=0.995, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:17<16:16,  4.75it/s, critic_loss=0.902, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:17<16:16,  4.75it/s, critic_loss=0.774, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:17<16:16,  4.75it/s, critic_loss=0.943, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:17<16:16,  4.75it/s, critic_loss=0.798, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.91, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.81, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.3, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.846, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.855, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.01, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.991, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.815, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 363/5000 [01:18<16:16,  4.75it/s, critic_loss=0.719, policy_loss=-4.65]

cri
pol


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.786, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.42, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.49, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.21, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.841, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.892, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.986, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.894, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.963, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1, policy_loss=-4.66]    

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.76, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.09, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.17, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.39, policy_loss=-4.79]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.03, policy_loss=-4.75]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=1.33, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 364/5000 [01:18<15:57,  4.84it/s, critic_loss=0.93, policy_loss=-4.64]

cri
pol


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.726, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.798, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.793, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.19, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.714, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.36, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.855, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.883, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.14, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.26, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.79, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.902, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.815, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.06, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=0.861, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 365/5000 [01:18<15:19,  5.04it/s, critic_loss=1.13, policy_loss=-4.74] 

cri
pol


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.978, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.953, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.3, policy_loss=-4.68]  

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.931, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.973, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.04, policy_loss=-4.74] 

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.09, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.18, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.8, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.866, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.11, policy_loss=-4.64] 

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.18, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.94, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=1.16, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.985, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.991, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.783, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 366/5000 [01:18<14:55,  5.18it/s, critic_loss=0.99, policy_loss=-4.65]

cri
pol


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.977, policy_loss=-4.57]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.29, policy_loss=-4.59] 

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.26, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.02, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.05, policy_loss=-4.78]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.912, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.962, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.825, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.07, policy_loss=-4.63] 

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.1, policy_loss=-4.61] 

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.08, policy_loss=-4.59]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.899, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.15, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.842, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=1.3, policy_loss=-4.7]   

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.952, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.882, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 367/5000 [01:18<14:35,  5.29it/s, critic_loss=0.987, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.939, policy_loss=-4.61]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.841, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=1.09, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=1.07, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.788, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=1.48, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.97, policy_loss=-4.77]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.804, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.978, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.804, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.773, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=0.977, policy_loss=-4.6] 

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:18<14:40,  5.26it/s, critic_loss=1.26, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=0.702, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=0.918, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=0.97, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=0.927, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=1.17, policy_loss=-4.73] 

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 368/5000 [01:19<14:40,  5.26it/s, critic_loss=0.827, policy_loss=-4.65]

cri
pol


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.929, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=1.03, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.97, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.73, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.918, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=1.1, policy_loss=-4.71]  

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.679, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.85, policy_loss=-4.65] 

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.867, policy_loss=-4.62]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.909, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.773, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.967, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.904, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.806, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.941, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.918, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=1.4, policy_loss=-4.71]  

cri


Epoch 1/10:   7%|▋         | 369/5000 [01:19<14:55,  5.17it/s, critic_loss=0.863, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.768, policy_loss=-4.63]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.976, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=1.06, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.741, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=1.24, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.872, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.956, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=1.04, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.866, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.647, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.706, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.877, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.751, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.919, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.853, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.878, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.845, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=0.88, policy_loss=-4.66] 

cri


Epoch 1/10:   7%|▋         | 370/5000 [01:19<14:42,  5.25it/s, critic_loss=1.04, policy_loss=-4.67]

cri
pol


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.786, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.935, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.847, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=1, policy_loss=-4.69]    

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.911, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=1, policy_loss=-4.64]    

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.839, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.944, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.82, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.973, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.881, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.981, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.839, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.805, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.702, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=1.17, policy_loss=-4.72] 

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=0.983, policy_loss=-4.76]

cri


Epoch 1/10:   7%|▋         | 371/5000 [01:19<15:32,  4.96it/s, critic_loss=1.13, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.832, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.838, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.941, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.864, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.715, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.856, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.922, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.624, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.797, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.956, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.958, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.819, policy_loss=-4.65]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.878, policy_loss=-4.64]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=1.05, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.981, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.951, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.671, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.794, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.908, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 372/5000 [01:19<15:36,  4.94it/s, critic_loss=0.935, policy_loss=-4.71]

cri
pol


Epoch 1/10:   7%|▋         | 373/5000 [01:19<16:29,  4.67it/s, critic_loss=1.07, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:19<16:29,  4.67it/s, critic_loss=0.891, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:19<16:29,  4.67it/s, critic_loss=0.887, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:19<16:29,  4.67it/s, critic_loss=0.794, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:19<16:29,  4.67it/s, critic_loss=0.816, policy_loss=-4.74]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:19<16:29,  4.67it/s, critic_loss=0.9, policy_loss=-4.71]  

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.961, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.89, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.929, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.958, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.767, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.853, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.894, policy_loss=-4.73]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.746, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=1.05, policy_loss=-4.71] 

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   7%|▋         | 373/5000 [01:20<16:29,  4.67it/s, critic_loss=0.999, policy_loss=-4.69]

cri
pol


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.884, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.784, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.793, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=1.01, policy_loss=-4.67] 

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.816, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=1.03, policy_loss=-4.68] 

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.891, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.782, policy_loss=-4.71]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.979, policy_loss=-4.7] 

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.999, policy_loss=-4.7]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.821, policy_loss=-4.69]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.752, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.889, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.951, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.782, policy_loss=-4.66]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.762, policy_loss=-4.67]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.862, policy_loss=-4.68]

cri


Epoch 1/10:   7%|▋         | 374/5000 [01:20<16:27,  4.69it/s, critic_loss=0.751, policy_loss=-4.71]

cri
pol


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.788, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.868, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.985, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.888, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.969, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.858, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.791, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.794, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.998, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.855, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.86, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.755, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.798, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=1.11, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.816, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.824, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.939, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=0.887, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 375/5000 [01:20<16:08,  4.78it/s, critic_loss=1.1, policy_loss=-4.69]  

cri
pol


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.912, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.77, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.836, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.73, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.815, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.844, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.871, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.979, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.914, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.926, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.869, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.9, policy_loss=-4.72]  

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.718, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.907, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=1.31, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.915, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.968, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=0.802, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 376/5000 [01:20<15:28,  4.98it/s, critic_loss=1.02, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.13, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.989, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.969, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.16, policy_loss=-4.75] 

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.816, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.06, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.28, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.775, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.968, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.02, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.589, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.14, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.795, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.803, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.04, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.783, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.02, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=1.25, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.769, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 377/5000 [01:20<15:46,  4.88it/s, critic_loss=0.821, policy_loss=-4.67]

cri
pol


Epoch 1/10:   8%|▊         | 378/5000 [01:20<15:59,  4.82it/s, critic_loss=0.884, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:20<15:59,  4.82it/s, critic_loss=1.09, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:20<15:59,  4.82it/s, critic_loss=1.4, policy_loss=-4.75] 

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:20<15:59,  4.82it/s, critic_loss=0.816, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:20<15:59,  4.82it/s, critic_loss=0.861, policy_loss=-4.78]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=1.16, policy_loss=-4.78] 

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=1.01, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.643, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=1.15, policy_loss=-4.62] 

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.667, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.837, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.83, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=1.41, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.868, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.869, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.996, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.814, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 378/5000 [01:21<15:59,  4.82it/s, critic_loss=0.985, policy_loss=-4.68]

cri
pol


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.05, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.878, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.717, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.723, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.814, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1, policy_loss=-4.67]    

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.927, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.941, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.685, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.775, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.01, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.934, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.961, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.852, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=0.969, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.16, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.31, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.05, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 379/5000 [01:21<15:46,  4.88it/s, critic_loss=1.04, policy_loss=-4.65]

cri
pol


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1.11, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.758, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.911, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.978, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.836, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1, policy_loss=-4.67]    

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1.16, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.64, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.879, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.984, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.806, policy_loss=-4.77]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.954, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.838, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.907, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.86, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 380/5000 [01:21<15:21,  5.01it/s, critic_loss=0.876, policy_loss=-4.71]

cri
pol


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=0.991, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.17, policy_loss=-4.75] 

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1, policy_loss=-4.75]   

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.08, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.19, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=0.755, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.13, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1, policy_loss=-4.72]   

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=0.831, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=0.954, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=0.93, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=1.6, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 381/5000 [01:21<15:24,  4.99it/s, critic_loss=0.847, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.982, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.05, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.913, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.996, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.756, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.971, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.946, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.865, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.873, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.981, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.942, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.837, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.985, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.06, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=1.49, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 382/5000 [01:21<15:24,  5.00it/s, critic_loss=0.95, policy_loss=-4.68]

cri
pol


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=0.922, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=1.27, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=1.11, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=0.988, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=1.17, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=1.57, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:21<15:15,  5.04it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.13, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.09, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.26, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.11, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=0.783, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.13, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.25, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 383/5000 [01:22<15:15,  5.04it/s, critic_loss=1.05, policy_loss=-4.59]

cri
pol


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.23, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.75, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.67, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.02, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.921, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.921, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.922, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.995, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.22, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.966, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.13, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.927, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.36, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.782, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.03, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.04, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=0.973, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 384/5000 [01:22<15:20,  5.01it/s, critic_loss=1.25, policy_loss=-4.69] 

cri
pol


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=0.914, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.08, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1, policy_loss=-4.64]   

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.1, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=0.948, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.14, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1, policy_loss=-4.67]   

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.06, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.31, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=0.954, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=0.794, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.25, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=0.933, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=1.08, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 385/5000 [01:22<15:03,  5.11it/s, critic_loss=0.92, policy_loss=-4.62]

cri
pol


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=0.932, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.22, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.35, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.15, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.12, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.33, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.18, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.08, policy_loss=-4.77]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.09, policy_loss=-4.77]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=0.955, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.47, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=0.871, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.03, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 386/5000 [01:22<14:59,  5.13it/s, critic_loss=1.19, policy_loss=-4.68]

cri
pol


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=0.954, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.16, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.17, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=0.949, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.19, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1, policy_loss=-4.65]  

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.05, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=0.704, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.23, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=0.975, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=0.823, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.43, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.26, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=0.998, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 387/5000 [01:22<14:53,  5.16it/s, critic_loss=1.11, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=0.923, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=1.11, policy_loss=-4.75] 

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=0.974, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=1.05, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=0.99, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=0.875, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=1.09, policy_loss=-4.62] 

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=0.785, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:22<15:02,  5.11it/s, critic_loss=1.05, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=0.969, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=1.42, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=0.847, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=1, policy_loss=-4.7]     

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=0.866, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=0.999, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=1.37, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 388/5000 [01:23<15:02,  5.11it/s, critic_loss=0.887, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.942, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.928, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.912, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.933, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.875, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.92, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.15, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.993, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.753, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.11, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.833, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.942, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.14, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.01, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=1.4, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 389/5000 [01:23<14:55,  5.15it/s, critic_loss=0.971, policy_loss=-4.7]

cri
pol


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.33, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.933, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.17, policy_loss=-4.64] 

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.898, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.07, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.797, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.993, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.844, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.779, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.965, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.89, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.906, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.988, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.939, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.835, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=0.867, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 390/5000 [01:23<15:13,  5.05it/s, critic_loss=1.08, policy_loss=-4.73] 

cri
pol


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.888, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.05, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.926, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.07, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.947, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.75, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.984, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.815, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.964, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.778, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1, policy_loss=-4.66]    

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.42, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.43, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.05, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.993, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=0.785, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.26, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.07, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 391/5000 [01:23<15:07,  5.08it/s, critic_loss=1.11, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.946, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.922, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.892, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.05, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.22, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.878, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.993, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.851, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.876, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.05, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.05, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.866, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.892, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.971, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.906, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.25, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=1.1, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.898, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 392/5000 [01:23<14:43,  5.21it/s, critic_loss=0.939, policy_loss=-4.67]

cri
pol


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.988, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.799, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.815, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.89, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.05, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.08, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.23, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.952, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.968, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=0.891, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.14, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:23<14:46,  5.19it/s, critic_loss=1.11, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:24<14:46,  5.19it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:24<14:46,  5.19it/s, critic_loss=0.9, policy_loss=-4.76] 

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:24<14:46,  5.19it/s, critic_loss=0.985, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:24<14:46,  5.19it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:24<14:46,  5.19it/s, critic_loss=0.96, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 393/5000 [01:24<14:46,  5.19it/s, critic_loss=0.872, policy_loss=-4.65]

cri
pol


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=1.04, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.92, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.863, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.775, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.688, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.868, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.909, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.902, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.955, policy_loss=-4.6] 

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=1.16, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.987, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.962, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=1.35, policy_loss=-4.75] 

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=1.17, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.776, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.892, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=1.42, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.963, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.821, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 394/5000 [01:24<14:39,  5.24it/s, critic_loss=0.824, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.896, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.868, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.701, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.714, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=1.09, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.812, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.767, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.885, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.928, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.695, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.784, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.634, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.718, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.627, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.614, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.874, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.649, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.801, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.691, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 395/5000 [01:24<14:18,  5.36it/s, critic_loss=0.9, policy_loss=-4.69]  

cri
pol


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.808, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=1.22, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.962, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.834, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.844, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.885, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.978, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.853, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.927, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.994, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.881, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=1.04, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.949, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.994, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.679, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=1.05, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.88, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.8, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 396/5000 [01:24<14:02,  5.46it/s, critic_loss=0.926, policy_loss=-4.74]

cri
pol


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.848, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.906, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.751, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.888, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.708, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.848, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.778, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.726, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.74, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=1.15, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.91, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.74, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.724, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.893, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.85, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.688, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.902, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 397/5000 [01:24<14:41,  5.22it/s, critic_loss=0.762, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.783, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.738, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.03, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.03, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.13, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.924, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.24, policy_loss=-4.78]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.11, policy_loss=-4.79]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.963, policy_loss=-4.77]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.1, policy_loss=-4.72]  

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.957, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.15, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.911, policy_loss=-4.61]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.14, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.88, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.1, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=0.89, policy_loss=-4.78]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:24<14:56,  5.13it/s, critic_loss=1.02, policy_loss=-4.78]

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:25<14:56,  5.13it/s, critic_loss=0.83, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 398/5000 [01:25<14:56,  5.13it/s, critic_loss=1.11, policy_loss=-4.7]

cri
pol


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=1.07, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.865, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=1.09, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.963, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.879, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.878, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=1.04, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.853, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=1.07, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.844, policy_loss=-4.61]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.845, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.704, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=1.03, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.914, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.887, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.784, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.777, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.896, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=0.768, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 399/5000 [01:25<15:06,  5.07it/s, critic_loss=1.12, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.913, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.833, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.758, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.869, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.936, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.803, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.75, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.881, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.767, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.75, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.844, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.895, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.732, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.688, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.775, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.786, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.953, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 400/5000 [01:25<15:21,  4.99it/s, critic_loss=0.616, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.05, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.982, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.841, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.01, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.12, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.871, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.961, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.01, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.01, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.44, policy_loss=-4.77]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.889, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.02, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=1.32, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.858, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.885, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 401/5000 [01:25<15:19,  5.00it/s, critic_loss=0.809, policy_loss=-4.71]

cri
pol


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.959, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.868, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.09, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.46, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.921, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.854, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.11, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.845, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.722, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.19, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.695, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.885, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.741, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.854, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.837, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.834, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=0.991, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.51, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 402/5000 [01:25<15:12,  5.04it/s, critic_loss=1.75, policy_loss=-4.72]

cri
pol


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.714, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.823, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.853, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.841, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=1, policy_loss=-4.66]    

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.919, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=1.14, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.89, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.82, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=1.05, policy_loss=-4.77]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.872, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=0.909, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:25<15:14,  5.03it/s, critic_loss=1, policy_loss=-4.68]    

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:26<15:14,  5.03it/s, critic_loss=0.854, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:26<15:14,  5.03it/s, critic_loss=1.29, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:26<15:14,  5.03it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:26<15:14,  5.03it/s, critic_loss=0.823, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 403/5000 [01:26<15:14,  5.03it/s, critic_loss=0.916, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.17, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.34, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.97, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.937, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.02, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.923, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.79, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.736, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.871, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.892, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.981, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.938, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.01, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.762, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.03, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=0.881, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 404/5000 [01:26<16:11,  4.73it/s, critic_loss=1.02, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.918, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.25, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.733, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.934, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.2, policy_loss=-4.69]  

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.14, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.896, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.01, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.3, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.844, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.857, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.05, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.728, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.14, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=1.21, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.826, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 405/5000 [01:26<16:06,  4.75it/s, critic_loss=0.776, policy_loss=-4.68]

cri
pol


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.754, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.799, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.766, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.856, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=1.01, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.97, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.78, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.95, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.817, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.947, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.694, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.766, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=1.36, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.782, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.874, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=1.28, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.932, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 406/5000 [01:26<16:36,  4.61it/s, critic_loss=0.959, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.796, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.967, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=1.47, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.938, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=1.03, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.779, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.834, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.998, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.839, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.849, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.953, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.984, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=1.28, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.877, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.904, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.914, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.69, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 407/5000 [01:26<15:49,  4.84it/s, critic_loss=0.985, policy_loss=-4.67]

cri
pol


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.955, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.743, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.816, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.779, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1.02, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.801, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.842, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.863, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1.05, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1.1, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=0.941, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1.02, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1.22, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:26<15:11,  5.04it/s, critic_loss=1, policy_loss=-4.73]   

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:27<15:11,  5.04it/s, critic_loss=1.07, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:27<15:11,  5.04it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:27<15:11,  5.04it/s, critic_loss=0.941, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:27<15:11,  5.04it/s, critic_loss=0.957, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 408/5000 [01:27<15:11,  5.04it/s, critic_loss=0.904, policy_loss=-4.68]

cri
pol


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.951, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.07, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.842, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.53, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.887, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.943, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.16, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.986, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.973, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.79, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.68, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.53, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.9, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.986, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.808, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=0.965, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 409/5000 [01:27<14:40,  5.22it/s, critic_loss=1.18, policy_loss=-4.64] 

cri
pol


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.842, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.24, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.887, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.953, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.01, policy_loss=-4.78] 

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.21, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.981, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.854, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.936, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.56, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.981, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.908, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.1, policy_loss=-4.76]  

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.913, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=1.19, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.894, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.911, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 410/5000 [01:27<14:32,  5.26it/s, critic_loss=0.801, policy_loss=-4.63]

cri
pol


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.14, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.921, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.12, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.07, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.823, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.12, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.04, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.99, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.977, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.881, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.2, policy_loss=-4.64]  

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.817, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.24, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.02, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.767, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.883, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=2.09, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=0.884, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.28, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 411/5000 [01:27<15:21,  4.98it/s, critic_loss=1.41, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.913, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.828, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.964, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.23, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.923, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1, policy_loss=-4.73]    

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.17, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.968, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.771, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.27, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.19, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.914, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.19, policy_loss=-4.75] 

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.998, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=0.966, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 412/5000 [01:27<15:14,  5.02it/s, critic_loss=1.19, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.887, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.849, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.897, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.39, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.08, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.05, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.32, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.919, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.902, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.912, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.13, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.811, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.03, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=0.965, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:27<14:57,  5.11it/s, critic_loss=1.13, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:28<14:57,  5.11it/s, critic_loss=1.08, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:28<14:57,  5.11it/s, critic_loss=0.983, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:28<14:57,  5.11it/s, critic_loss=0.767, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 413/5000 [01:28<14:57,  5.11it/s, critic_loss=1.13, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.883, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.889, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.892, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=1.04, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.95, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.925, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.814, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.806, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.867, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.878, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.995, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=1.04, policy_loss=-4.66] 

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.88, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.737, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.874, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=1.14, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.893, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=1.05, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=0.946, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 414/5000 [01:28<15:08,  5.05it/s, critic_loss=1.16, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.855, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.923, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.866, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.786, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.921, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.988, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.708, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=1.6, policy_loss=-4.7]   

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.941, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.999, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=1.4, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.849, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=1.15, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.853, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.856, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.857, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.797, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.982, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 415/5000 [01:28<14:57,  5.11it/s, critic_loss=0.752, policy_loss=-4.69]

cri
pol


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.901, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.94, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.77, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.928, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.749, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.999, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.766, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.969, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=1.19, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.967, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.806, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.914, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.913, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.958, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.862, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.852, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=0.794, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 416/5000 [01:28<15:27,  4.94it/s, critic_loss=1.23, policy_loss=-4.68] 

cri
pol


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.822, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.947, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.871, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.799, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.02, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.87, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.803, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.13, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.13, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.03, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.764, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=2.62, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1, policy_loss=-4.65]   

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.2, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.87, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.948, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=1.09, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 417/5000 [01:28<15:42,  4.86it/s, critic_loss=0.921, policy_loss=-4.72]

cri
pol


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.829, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.779, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.862, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.943, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.863, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.842, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.795, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.836, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.87, policy_loss=-4.74] 

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.64, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.927, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.806, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.92, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.64, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.954, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=1.15, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:28<14:57,  5.11it/s, critic_loss=0.936, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:29<14:57,  5.11it/s, critic_loss=0.919, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:29<14:57,  5.11it/s, critic_loss=0.901, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 418/5000 [01:29<14:57,  5.11it/s, critic_loss=0.809, policy_loss=-4.68]

cri
pol


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.806, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.789, policy_loss=-4.61]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=1.03, policy_loss=-4.65] 

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.863, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.972, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.791, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.728, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.878, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.697, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.938, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.701, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=1.86, policy_loss=-4.64] 

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=1.04, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.732, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.76, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=1.08, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.731, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.798, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.916, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 419/5000 [01:29<15:12,  5.02it/s, critic_loss=0.823, policy_loss=-4.66]

cri
pol


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.886, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.913, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.844, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.83, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.763, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.745, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=1.08, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=1.28, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.718, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.718, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=1.23, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.914, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.639, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.837, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.686, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=1.1, policy_loss=-4.73] 

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.972, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.825, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.756, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 420/5000 [01:29<15:32,  4.91it/s, critic_loss=0.776, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=1.39, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.826, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.775, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.896, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.761, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=1.09, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.856, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=1.03, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.72, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.785, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=1.18, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.791, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.9, policy_loss=-4.72]  

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.855, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.996, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.819, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.803, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=1.16, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:   8%|▊         | 421/5000 [01:29<15:31,  4.91it/s, critic_loss=0.738, policy_loss=-4.72]

cri
pol


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.92, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.788, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=1.27, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.671, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.889, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.834, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.861, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=1.24, policy_loss=-4.7]  

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.775, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.844, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.879, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.725, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.746, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.838, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.904, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=1, policy_loss=-4.68]    

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.842, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.749, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.968, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 422/5000 [01:29<15:00,  5.08it/s, critic_loss=0.866, policy_loss=-4.67]

cri
pol


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.863, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.921, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.885, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.859, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.822, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.777, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.718, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.994, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.896, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=1.42, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.783, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.947, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.817, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.925, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.789, policy_loss=-4.76]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=0.76, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=1.08, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:29<14:37,  5.22it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:30<14:37,  5.22it/s, critic_loss=0.925, policy_loss=-4.62]

cri


Epoch 1/10:   8%|▊         | 423/5000 [01:30<14:37,  5.22it/s, critic_loss=0.841, policy_loss=-4.65]

cri
pol


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.877, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.907, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.13, policy_loss=-4.71] 

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.01, policy_loss=-4.75]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.727, policy_loss=-4.71]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.2, policy_loss=-4.7]   

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.804, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.869, policy_loss=-4.67]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.25, policy_loss=-4.67] 

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1, policy_loss=-4.69]   

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.891, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.906, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.03, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.811, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.827, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=2, policy_loss=-4.67]    

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.786, policy_loss=-4.63]

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=1.51, policy_loss=-4.63] 

cri


Epoch 1/10:   8%|▊         | 424/5000 [01:30<14:54,  5.12it/s, critic_loss=0.824, policy_loss=-4.7]

cri
pol


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.44, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.44, policy_loss=-4.74]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.805, policy_loss=-4.72]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.77, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.795, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.26, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.2, policy_loss=-4.72] 

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.689, policy_loss=-4.7]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.847, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.78, policy_loss=-4.66]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.972, policy_loss=-4.65]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.67, policy_loss=-4.69] 

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=0.891, policy_loss=-4.69]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.37, policy_loss=-4.68] 

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.01, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   8%|▊         | 425/5000 [01:30<14:49,  5.14it/s, critic_loss=1.03, policy_loss=-4.73]

cri
pol


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.938, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.812, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.09, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.89, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.971, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.22, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.989, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.32, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.965, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.983, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.95, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.8, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.33, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.42, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.779, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.12, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.24, policy_loss=-4.77]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=0.832, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 426/5000 [01:30<14:41,  5.19it/s, critic_loss=1.28, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.934, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.899, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.15, policy_loss=-4.63] 

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.858, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.39, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.844, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.39, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.932, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.989, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.892, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.953, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.849, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.09, policy_loss=-4.63] 

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.02, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.947, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.971, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=0.805, policy_loss=-4.77]

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.14, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▊         | 427/5000 [01:30<14:33,  5.23it/s, critic_loss=1.49, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.2, policy_loss=-4.62] 

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.969, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.764, policy_loss=-4.61]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.18, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.841, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.844, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.04, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.989, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.04, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.858, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.984, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.985, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.796, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.963, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.13, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.994, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.76, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.27, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 428/5000 [01:30<14:08,  5.39it/s, critic_loss=0.859, policy_loss=-4.68]

cri
pol


Epoch 1/10:   9%|▊         | 429/5000 [01:30<14:17,  5.33it/s, critic_loss=0.89, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:30<14:17,  5.33it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:30<14:17,  5.33it/s, critic_loss=0.927, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.937, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.832, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=1.22, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.921, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.923, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.964, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.828, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.83, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.802, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=1.05, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.939, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=1, policy_loss=-4.7]    

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.808, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.832, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 429/5000 [01:31<14:17,  5.33it/s, critic_loss=0.823, policy_loss=-4.66]

cri
pol


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.955, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.73, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.862, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.908, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.833, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.863, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.973, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.821, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.861, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.824, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=1.2, policy_loss=-4.71]  

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.79, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.976, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=1.23, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.979, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.864, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.861, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.918, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 430/5000 [01:31<14:27,  5.27it/s, critic_loss=0.912, policy_loss=-4.66]

cri
pol


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.905, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.15, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.1, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.745, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.08, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.851, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.804, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.901, policy_loss=-4.61]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.15, policy_loss=-4.64] 

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=2.02, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.735, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.03, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.06, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.83, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.841, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.882, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.17, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=0.786, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 431/5000 [01:31<15:11,  5.01it/s, critic_loss=1.25, policy_loss=-4.69] 

cri
pol


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.83, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.9, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.818, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.755, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.899, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.708, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.925, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.78, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.842, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.717, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.877, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.782, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.912, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=1.08, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=0.834, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=1.25, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 432/5000 [01:31<15:38,  4.87it/s, critic_loss=1.59, policy_loss=-4.71]

cri
pol


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.773, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1, policy_loss=-4.72]    

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.989, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.25, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.817, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.05, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.43, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.99, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.934, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.885, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.835, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.36, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=1.2, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.846, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.932, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 433/5000 [01:31<15:18,  4.97it/s, critic_loss=0.887, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▊         | 434/5000 [01:31<15:08,  5.03it/s, critic_loss=1.28, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:31<15:08,  5.03it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.07, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.33, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=0.96, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=0.893, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.09, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=0.872, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.17, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=0.991, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.39, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=0.802, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 434/5000 [01:32<15:08,  5.03it/s, critic_loss=0.898, policy_loss=-4.66]

cri
pol


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=0.897, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.04, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=0.953, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.29, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=0.988, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.03, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=0.882, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.63, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=0.871, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=0.982, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.19, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.01, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.21, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 435/5000 [01:32<15:42,  4.84it/s, critic_loss=1.2, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.01, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.3, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.995, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.919, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.892, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.947, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.979, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.12, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.938, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.2, policy_loss=-4.65]  

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.824, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.782, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.13, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=0.96, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 436/5000 [01:32<15:39,  4.86it/s, critic_loss=1.2, policy_loss=-4.73] 

cri
pol


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.03, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.26, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.828, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.16, policy_loss=-4.61] 

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.723, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.845, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.11, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.894, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.01, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.18, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.935, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.5, policy_loss=-4.68]  

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.97, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=1, policy_loss=-4.74]  

cri


Epoch 1/10:   9%|▊         | 437/5000 [01:32<15:57,  4.77it/s, critic_loss=0.957, policy_loss=-4.73]

cri
pol


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1, policy_loss=-4.69]    

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.01, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=0.807, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.41, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.77, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.15, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=0.802, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=0.948, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.04, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1, policy_loss=-4.66]   

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.42, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=0.998, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:32<15:47,  4.81it/s, critic_loss=1.19, policy_loss=-4.64] 

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:33<15:47,  4.81it/s, critic_loss=0.91, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:33<15:47,  4.81it/s, critic_loss=0.935, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:33<15:47,  4.81it/s, critic_loss=1.21, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:33<15:47,  4.81it/s, critic_loss=0.973, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 438/5000 [01:33<15:47,  4.81it/s, critic_loss=1, policy_loss=-4.71]    

cri
pol


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.06, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.839, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.867, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.24, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.722, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.896, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.921, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.93, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.02, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.08, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.893, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.961, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.23, policy_loss=-4.64] 

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.17, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.14, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.779, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.968, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=1.56, policy_loss=-4.77]

cri


Epoch 1/10:   9%|▉         | 439/5000 [01:33<15:57,  4.76it/s, critic_loss=0.743, policy_loss=-4.73]

cri
pol


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.05, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.855, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.813, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.26, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.93, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.09, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.36, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.972, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.962, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.23, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.899, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.923, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.05, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.834, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.807, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=1.31, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 440/5000 [01:33<16:07,  4.72it/s, critic_loss=0.803, policy_loss=-4.71]

cri
pol


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.868, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.66, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.12, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.02, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.888, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.1, policy_loss=-4.7]   

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.958, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.878, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.966, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.97, policy_loss=-4.63] 

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.94, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.966, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.896, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.934, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.18, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 441/5000 [01:33<16:19,  4.66it/s, critic_loss=0.97, policy_loss=-4.64]

cri
pol


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.789, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.06, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.951, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.876, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.19, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.828, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.899, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.12, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.878, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.05, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.825, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.34, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.835, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.11, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.15, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=0.753, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 442/5000 [01:33<15:35,  4.87it/s, critic_loss=1.01, policy_loss=-4.61] 

cri
pol


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.848, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.993, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.784, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.846, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=1.35, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.75, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.952, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.958, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=1.12, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=1.31, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=1.21, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:33<15:26,  4.92it/s, critic_loss=0.847, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:34<15:26,  4.92it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:34<15:26,  4.92it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:34<15:26,  4.92it/s, critic_loss=0.933, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:34<15:26,  4.92it/s, critic_loss=0.889, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:34<15:26,  4.92it/s, critic_loss=0.997, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 443/5000 [01:34<15:26,  4.92it/s, critic_loss=0.98, policy_loss=-4.72] 

cri
pol


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.06, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.995, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.09, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.94, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.52, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.96, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.39, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.893, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.01, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.928, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.913, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=1.03, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.903, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.989, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.863, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 444/5000 [01:34<15:07,  5.02it/s, critic_loss=0.986, policy_loss=-4.65]

cri
pol


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.14, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.956, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.867, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.25, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.954, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.05, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.699, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.03, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.29, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=1.25, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.993, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.901, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.925, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.899, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.993, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.887, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 445/5000 [01:34<15:19,  4.96it/s, critic_loss=0.857, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.872, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.03, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.817, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.26, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.938, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.911, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.775, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.84, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.709, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.952, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.28, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.42, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.04, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.28, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.913, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=1.07, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.921, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 446/5000 [01:34<15:27,  4.91it/s, critic_loss=0.969, policy_loss=-4.68]

cri
pol


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.863, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.03, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.866, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.07, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.888, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.713, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.15, policy_loss=-4.63] 

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1, policy_loss=-4.62]   

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.03, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.856, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=1.32, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.98, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.916, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.83, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.882, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.796, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.946, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 447/5000 [01:34<15:25,  4.92it/s, critic_loss=0.858, policy_loss=-4.68]

cri
pol


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.945, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.35, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.819, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.36, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.82, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.68, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.851, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.868, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.62, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.931, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.01, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=0.938, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:34<15:08,  5.01it/s, critic_loss=1.06, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:35<15:08,  5.01it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:35<15:08,  5.01it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:35<15:08,  5.01it/s, critic_loss=0.767, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 448/5000 [01:35<15:08,  5.01it/s, critic_loss=0.796, policy_loss=-4.65]

cri
pol


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=1.77, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.753, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.746, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.809, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.837, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.847, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.958, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.893, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.668, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.743, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.678, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.863, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=1.09, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.948, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.9, policy_loss=-4.64]  

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=1.3, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=1.21, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=1.09, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.852, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 449/5000 [01:35<14:36,  5.20it/s, critic_loss=0.771, policy_loss=-4.77]

cri
pol


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.89, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.89, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.898, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.707, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.998, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.932, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.819, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.889, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=1.04, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.82, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.873, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.92, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.918, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=1.03, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.95, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=1.14, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=1.2, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=0.847, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=1.1, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▉         | 450/5000 [01:35<14:17,  5.31it/s, critic_loss=1.06, policy_loss=-4.75]

cri
pol


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.2, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.724, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.789, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.878, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.718, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.849, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.02, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.951, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.72, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.875, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.699, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.21, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.785, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.945, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.768, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.09, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.8, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=0.88, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 451/5000 [01:35<14:05,  5.38it/s, critic_loss=1.21, policy_loss=-4.75]

cri
pol


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.897, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.756, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.834, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.924, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.888, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.638, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.897, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.849, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.857, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.92, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.791, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.845, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.851, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.641, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.867, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=1.04, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.998, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 452/5000 [01:35<14:19,  5.29it/s, critic_loss=0.909, policy_loss=-4.73]

cri
pol


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.847, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.917, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.842, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.712, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.805, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=1.11, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.767, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.762, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.885, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.813, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.967, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.771, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.891, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.802, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=1.33, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.917, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.668, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:35<14:43,  5.14it/s, critic_loss=0.808, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 453/5000 [01:36<14:43,  5.14it/s, critic_loss=1.09, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.925, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.998, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.871, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.776, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.931, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.716, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.896, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.919, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.808, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.994, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.681, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.807, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=1.08, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=1.14, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.795, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=1, policy_loss=-4.71]    

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=1.07, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.735, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.89, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 454/5000 [01:36<15:20,  4.94it/s, critic_loss=0.714, policy_loss=-4.66]

cri
pol


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.874, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.836, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=1.25, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.874, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.759, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.941, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=1.07, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.709, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=1.08, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.728, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.661, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.739, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.966, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.751, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.724, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.621, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.871, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.827, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=1.1, policy_loss=-4.69]  

cri


Epoch 1/10:   9%|▉         | 455/5000 [01:36<15:33,  4.87it/s, critic_loss=0.815, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.74, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.875, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.886, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.804, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.735, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.77, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.962, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=1.21, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.745, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.804, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.814, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.952, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.868, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.901, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.97, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.942, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=1.27, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 456/5000 [01:36<15:52,  4.77it/s, critic_loss=0.682, policy_loss=-4.68]

cri
pol


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=2.72, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.947, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.963, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.951, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=1.02, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.976, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.787, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=1.02, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.785, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.73, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.921, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.639, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.753, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.937, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.879, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.882, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=1.18, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 457/5000 [01:36<15:36,  4.85it/s, critic_loss=0.984, policy_loss=-4.71]

cri
pol


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.926, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.18, policy_loss=-4.74] 

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.23, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.999, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.14, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.32, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.749, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.925, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.26, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.33, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.01, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.39, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.92, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.56, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.841, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:36<14:59,  5.05it/s, critic_loss=0.945, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:37<14:59,  5.05it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:37<14:59,  5.05it/s, critic_loss=1.11, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 458/5000 [01:37<14:59,  5.05it/s, critic_loss=1.1, policy_loss=-4.7]  

cri
pol


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=0.896, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=0.926, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.25, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.2, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.4, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=0.804, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.56, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.06, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.34, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.22, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=0.851, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.2, policy_loss=-4.7]   

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.49, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.13, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 459/5000 [01:37<15:02,  5.03it/s, critic_loss=1.13, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=0.749, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.5, policy_loss=-4.64]  

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.18, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.1, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.64, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.06, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.22, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.12, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.29, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.11, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.21, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.34, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 460/5000 [01:37<15:07,  5.00it/s, critic_loss=1.25, policy_loss=-4.7] 

cri
pol


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.02, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.17, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.24, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.32, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.42, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.23, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.38, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.37, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.29, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.6, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.74, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=0.978, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 461/5000 [01:37<14:58,  5.05it/s, critic_loss=1.05, policy_loss=-4.67] 

cri
pol


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.23, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.28, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=0.984, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=0.977, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.19, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.41, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=0.993, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.05, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.71, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.34, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.14, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.34, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.43, policy_loss=-4.78]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.18, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.38, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.37, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▉         | 462/5000 [01:37<15:39,  4.83it/s, critic_loss=1.37, policy_loss=-4.59]

cri
pol


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.1, policy_loss=-4.62] 

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.41, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.38, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=0.98, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.24, policy_loss=-4.77]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.21, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.18, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.78, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.18, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.17, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.27, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.27, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:37<14:55,  5.07it/s, critic_loss=1.32, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=0.954, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=1.15, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=1.38, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=1.21, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=0.992, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=1.41, policy_loss=-4.64] 

cri


Epoch 1/10:   9%|▉         | 463/5000 [01:38<14:55,  5.07it/s, critic_loss=1.08, policy_loss=-4.61]

cri
pol


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.51, policy_loss=-4.6] 

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.3, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=0.915, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.08, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.05, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=0.873, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.1, policy_loss=-4.62]  

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.47, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.48, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.25, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=0.914, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.16, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.34, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.1, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.2, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.2, policy_loss=-4.62]

cri


Epoch 1/10:   9%|▉         | 464/5000 [01:38<15:35,  4.85it/s, critic_loss=1.07, policy_loss=-4.61]

cri
pol


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.15, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.937, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.24, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.95, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.994, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.965, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.986, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.13, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.05, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.09, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.08, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.92, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.896, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.42, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.04, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=0.847, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.11, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.28, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 465/5000 [01:38<15:12,  4.97it/s, critic_loss=1.01, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.998, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.25, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.944, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.966, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.963, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.39, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.946, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.42, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.926, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.881, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.02, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.04, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=0.799, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1.19, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▉         | 466/5000 [01:38<15:03,  5.02it/s, critic_loss=1, policy_loss=-4.63]   

cri
pol


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.26, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.3, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.866, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.982, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.897, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.944, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.5, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.991, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.939, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.89, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.897, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.962, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.44, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=0.91, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 467/5000 [01:38<15:46,  4.79it/s, critic_loss=1.13, policy_loss=-4.68]

cri
pol


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.858, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=1.05, policy_loss=-4.76] 

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.903, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.983, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.974, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.963, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.801, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:38<16:04,  4.70it/s, critic_loss=0.953, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.21, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.32, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=0.774, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.37, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=0.871, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.11, policy_loss=-4.65] 

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.25, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 468/5000 [01:39<16:04,  4.70it/s, critic_loss=0.929, policy_loss=-4.68]

cri
pol


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.05, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.32, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.12, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.873, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.03, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.988, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.928, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.22, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.64, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.18, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.41, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.965, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.08, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=1.44, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.864, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.97, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 469/5000 [01:39<15:52,  4.76it/s, critic_loss=0.908, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.813, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.862, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.4, policy_loss=-4.7]   

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.912, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.939, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.14, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.924, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.35, policy_loss=-4.67] 

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.27, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.977, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.845, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.27, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.42, policy_loss=-4.78]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.14, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=1.03, policy_loss=-4.77]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.915, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 470/5000 [01:39<15:34,  4.85it/s, critic_loss=0.98, policy_loss=-4.71] 

cri
pol


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.905, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.07, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.919, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.846, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.837, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.758, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.08, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.37, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.16, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.798, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.05, policy_loss=-4.66] 

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.99, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.928, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.998, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.04, policy_loss=-4.77]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.936, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.17, policy_loss=-4.78] 

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=1.02, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 471/5000 [01:39<15:31,  4.86it/s, critic_loss=0.885, policy_loss=-4.69]

cri
pol


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.997, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.26, policy_loss=-4.63] 

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.858, policy_loss=-4.6]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.819, policy_loss=-4.6]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.89, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.913, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.955, policy_loss=-4.76]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.13, policy_loss=-4.77] 

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.09, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.735, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.787, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.11, policy_loss=-4.62] 

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.911, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.08, policy_loss=-4.68] 

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.877, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.01, policy_loss=-4.73] 

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.834, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.924, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=0.845, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 472/5000 [01:39<15:28,  4.88it/s, critic_loss=1.06, policy_loss=-4.65] 

cri
pol


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=0.788, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=0.953, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=1.03, policy_loss=-4.71] 

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=0.982, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=1.05, policy_loss=-4.75] 

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=0.734, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=0.795, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=1.37, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:39<15:13,  4.96it/s, critic_loss=0.946, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.794, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.947, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.938, policy_loss=-4.61]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.819, policy_loss=-4.63]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=1.19, policy_loss=-4.64] 

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.851, policy_loss=-4.65]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.791, policy_loss=-4.64]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.848, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=1.1, policy_loss=-4.71]  

cri


Epoch 1/10:   9%|▉         | 473/5000 [01:40<15:13,  4.96it/s, critic_loss=0.925, policy_loss=-4.72]

cri
pol


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.919, policy_loss=-4.75]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.911, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.942, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.922, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.825, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.756, policy_loss=-4.7]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.855, policy_loss=-4.74]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.943, policy_loss=-4.67]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.814, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.991, policy_loss=-4.72]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=1.11, policy_loss=-4.72] 

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=1.01, policy_loss=-4.7] 

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.921, policy_loss=-4.68]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.859, policy_loss=-4.66]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=1.14, policy_loss=-4.7]  

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.782, policy_loss=-4.71]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.826, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=0.904, policy_loss=-4.73]

cri


Epoch 1/10:   9%|▉         | 474/5000 [01:40<15:08,  4.98it/s, critic_loss=1.38, policy_loss=-4.73] 

cri
pol


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.943, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.716, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=1.15, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.846, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.944, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.742, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.784, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.81, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.709, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=1.06, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=1.27, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.803, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=1.09, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.85, policy_loss=-4.76]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.896, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.712, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.932, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.764, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 475/5000 [01:40<15:38,  4.82it/s, critic_loss=0.94, policy_loss=-4.63] 

cri
pol


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.82, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.714, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.828, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.891, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.866, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.94, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.776, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.905, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.707, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.964, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.89, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=1.39, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.839, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.693, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=1.21, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=1.12, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=1.13, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.902, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.796, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 476/5000 [01:40<16:06,  4.68it/s, critic_loss=0.792, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=1.01, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.759, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.821, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.894, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.778, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=1.04, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.774, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.89, policy_loss=-4.63] 

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=1.37, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.887, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.891, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.8, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.679, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.924, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.919, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.939, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.846, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.781, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 477/5000 [01:40<15:13,  4.95it/s, critic_loss=0.744, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.869, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.907, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=1.3, policy_loss=-4.72]  

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.936, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.971, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.797, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.842, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.721, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.712, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:40<14:41,  5.13it/s, critic_loss=0.833, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=0.879, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=0.807, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=0.77, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=0.691, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=0.668, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=0.713, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 478/5000 [01:41<14:41,  5.13it/s, critic_loss=1.07, policy_loss=-4.65] 

cri
pol


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.702, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=1.17, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.831, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.991, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.968, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.846, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.881, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.84, policy_loss=-4.61] 

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.893, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.766, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=1.08, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=1.06, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.819, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.862, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.955, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.686, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=0.835, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 479/5000 [01:41<14:18,  5.27it/s, critic_loss=1.01, policy_loss=-4.62] 

cri
pol


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.879, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.945, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.835, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.773, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1.04, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.685, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.87, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1.14, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.859, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1, policy_loss=-4.7]     

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.863, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.979, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1.21, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.698, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.873, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.895, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=0.984, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 480/5000 [01:41<14:27,  5.21it/s, critic_loss=1.16, policy_loss=-4.65] 

cri
pol


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.717, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.912, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.886, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.23, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.07, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.97, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.76, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.04, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.903, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.853, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.983, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.02, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.86, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.02, policy_loss=-4.79]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.814, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.08, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.883, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=1.18, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 481/5000 [01:41<14:45,  5.10it/s, critic_loss=0.796, policy_loss=-4.61]

cri
pol


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.12, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.18, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.13, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.878, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.95, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.908, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.902, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.833, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.15, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.857, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.953, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.79, policy_loss=-4.77] 

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.12, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.857, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=0.991, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.25, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.06, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 482/5000 [01:41<14:43,  5.11it/s, critic_loss=1.02, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.03, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=0.977, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=0.792, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.15, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=0.877, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=0.906, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=0.914, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.07, policy_loss=-4.64] 

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.13, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.45, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.11, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:41<14:36,  5.16it/s, critic_loss=1.26, policy_loss=-4.81]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=1.11, policy_loss=-4.79]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=0.874, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=1.08, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=0.861, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=0.984, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=1.12, policy_loss=-4.59] 

cri


Epoch 1/10:  10%|▉         | 483/5000 [01:42<14:36,  5.16it/s, critic_loss=1.25, policy_loss=-4.64]

cri
pol


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.3, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.08, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.933, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.17, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.07, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.95, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.979, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.884, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.11, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.06, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.32, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.22, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.852, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.976, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.92, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.77, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=0.916, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.26, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 484/5000 [01:42<15:01,  5.01it/s, critic_loss=1.25, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.08, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.879, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.14, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.16, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.3, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.986, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.33, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.05, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.01, policy_loss=-4.79]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.21, policy_loss=-4.79]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.15, policy_loss=-4.76]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.958, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.04, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.916, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.768, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.11, policy_loss=-4.63] 

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.919, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.02, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 485/5000 [01:42<15:30,  4.85it/s, critic_loss=0.848, policy_loss=-4.73]

cri
pol


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.02, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.78, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.1, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.19, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.14, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.862, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.913, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.718, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.953, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.1, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.931, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.05, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.854, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.08, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=1.07, policy_loss=-4.78]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.938, policy_loss=-4.78]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.878, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 486/5000 [01:42<15:35,  4.82it/s, critic_loss=0.899, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.934, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.978, policy_loss=-4.61]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.998, policy_loss=-4.61]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.47, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.83, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.03, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.824, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.12, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.923, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.803, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.975, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.08, policy_loss=-4.63] 

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.18, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.19, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=0.985, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.1, policy_loss=-4.72]  

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.32, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 487/5000 [01:42<15:27,  4.87it/s, critic_loss=1.32, policy_loss=-4.73]

cri
pol


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=0.947, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=1.1, policy_loss=-4.66]  

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=0.791, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=1.11, policy_loss=-4.64] 

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=1.11, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:42<15:20,  4.90it/s, critic_loss=1.24, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=1.38, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.876, policy_loss=-4.77]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.946, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.903, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=1.2, policy_loss=-4.66]  

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.747, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.796, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.929, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.949, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.846, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=1.16, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=1.16, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 488/5000 [01:43<15:20,  4.90it/s, critic_loss=0.893, policy_loss=-4.74]

cri
pol


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.2, policy_loss=-4.71]  

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.05, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.854, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.927, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.926, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.16, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.943, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.998, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.09, policy_loss=-4.77] 

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.06, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.91, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.921, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.02, policy_loss=-4.63] 

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.931, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.736, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.965, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.01, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.06, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=1.1, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 489/5000 [01:43<15:38,  4.81it/s, critic_loss=0.791, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.844, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.732, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.948, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=1.06, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.82, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.922, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.802, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.775, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.74, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.991, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.972, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.871, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.924, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.95, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.979, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.796, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.927, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=1.25, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 490/5000 [01:43<15:32,  4.84it/s, critic_loss=0.822, policy_loss=-4.71]

cri
pol


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.899, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.946, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.952, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=1.28, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.811, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.94, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=1.72, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.747, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.901, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.742, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.912, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=1.03, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.926, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.956, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.752, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.99, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=1.07, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.971, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 491/5000 [01:43<16:16,  4.62it/s, critic_loss=0.744, policy_loss=-4.65]

cri
pol


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.22, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.02, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.02, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.44, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.02, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.941, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.08, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.869, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.26, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.23, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.792, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.12, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.86, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.31, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.966, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.881, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.05, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=1.44, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.997, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 492/5000 [01:43<15:45,  4.77it/s, critic_loss=0.934, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 493/5000 [01:43<15:26,  4.86it/s, critic_loss=1.24, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:43<15:26,  4.86it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:43<15:26,  4.86it/s, critic_loss=0.769, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.944, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.979, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=1.56, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.931, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=1.12, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.958, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.815, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=1.49, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.912, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.958, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.745, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.871, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.864, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.906, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=1.06, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 493/5000 [01:44<15:26,  4.86it/s, critic_loss=0.776, policy_loss=-4.7]

cri
pol


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.799, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.17, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.958, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.909, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.852, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.2, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.09, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.98, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.816, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.617, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.791, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.1, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=0.946, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.17, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.24, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 494/5000 [01:44<15:28,  4.85it/s, critic_loss=1.19, policy_loss=-4.71]

cri
pol


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.35, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.16, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.728, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.07, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.762, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.02, policy_loss=-4.63] 

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.739, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.838, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.825, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.04, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.882, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.963, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.33, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.959, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.928, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1.2, policy_loss=-4.67]  

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.987, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.94, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=0.761, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 495/5000 [01:44<15:40,  4.79it/s, critic_loss=1, policy_loss=-4.71]   

cri
pol


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.2, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.82, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.04, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.08, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.874, policy_loss=-4.61]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.823, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.06, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.02, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.04, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.9, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1, policy_loss=-4.77]  

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.837, policy_loss=-4.74]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.965, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.845, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.02, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.26, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.832, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.999, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=1.35, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|▉         | 496/5000 [01:44<15:29,  4.84it/s, critic_loss=0.885, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.35, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.763, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.02, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.831, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.17, policy_loss=-4.64] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.812, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.893, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.961, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.998, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.79, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.721, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.924, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.02, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.37, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.12, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.867, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.727, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=0.86, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|▉         | 497/5000 [01:44<15:25,  4.87it/s, critic_loss=1.04, policy_loss=-4.71]

cri
pol


Epoch 1/10:  10%|▉         | 498/5000 [01:44<15:08,  4.96it/s, critic_loss=0.736, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1.07, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.902, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.731, policy_loss=-4.67]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.819, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1.12, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.779, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.711, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1, policy_loss=-4.69]    

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.757, policy_loss=-4.64]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.628, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.878, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.959, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1.16, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=1.03, policy_loss=-4.75]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.884, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.876, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 498/5000 [01:45<15:08,  4.96it/s, critic_loss=0.937, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.968, policy_loss=-4.62]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.969, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.789, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.01, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.08, policy_loss=-4.71]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.801, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.716, policy_loss=-4.72]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.668, policy_loss=-4.69]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.681, policy_loss=-4.63]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.963, policy_loss=-4.65]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.16, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.21, policy_loss=-4.68]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.88, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.856, policy_loss=-4.73]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.03, policy_loss=-4.76] 

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.746, policy_loss=-4.7]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=0.976, policy_loss=-4.66]

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1.41, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|▉         | 499/5000 [01:45<14:52,  5.05it/s, critic_loss=1, policy_loss=-4.63]   

cri
pol


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.992, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.924, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=1.04, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=1.04, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.794, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.897, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.91, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.662, policy_loss=-4.62]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.779, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.734, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.794, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.802, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=1.1, policy_loss=-4.75]  

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.978, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=1.06, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=1.11, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.982, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.858, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 500/5000 [01:45<15:12,  4.93it/s, critic_loss=0.848, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.22, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.865, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.12, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.16, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.793, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.908, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.45, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.782, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.83, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.899, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.799, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.947, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.925, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=1.1, policy_loss=-4.68]  

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.822, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.874, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.9, policy_loss=-4.69]  

cri


Epoch 1/10:  10%|█         | 501/5000 [01:45<14:52,  5.04it/s, critic_loss=0.815, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.774, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.882, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=2.68, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.919, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.3, policy_loss=-4.7]   

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.738, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.01, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.93, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.866, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.871, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.613, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.696, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.926, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=0.587, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.27, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.1, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.48, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.19, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.04, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 502/5000 [01:45<14:32,  5.15it/s, critic_loss=1.14, policy_loss=-4.71]

cri
pol


Epoch 1/10:  10%|█         | 503/5000 [01:45<14:21,  5.22it/s, critic_loss=0.962, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:45<14:21,  5.22it/s, critic_loss=1.16, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|█         | 503/5000 [01:45<14:21,  5.22it/s, critic_loss=0.858, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:45<14:21,  5.22it/s, critic_loss=0.849, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:45<14:21,  5.22it/s, critic_loss=1.02, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 503/5000 [01:45<14:21,  5.22it/s, critic_loss=1.04, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.01, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.26, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.16, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.758, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.917, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.04, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.868, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.73, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.894, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.819, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=0.901, policy_loss=-4.61]

cri


Epoch 1/10:  10%|█         | 503/5000 [01:46<14:21,  5.22it/s, critic_loss=1.17, policy_loss=-4.64] 

cri
pol


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.13, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.14, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.903, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.92, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.794, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.16, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.933, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.32, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.847, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.981, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.2, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.873, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.07, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.901, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.17, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=1.21, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 504/5000 [01:46<14:07,  5.31it/s, critic_loss=0.818, policy_loss=-4.73]

cri
pol


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.767, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=1.29, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.983, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=1.27, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.935, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.846, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.985, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=1.05, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.889, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=1.04, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.925, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.905, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=1.08, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=1.46, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.918, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.88, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.983, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.748, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.949, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 505/5000 [01:46<13:55,  5.38it/s, critic_loss=0.882, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.774, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.832, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.88, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1.09, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.899, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.683, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.76, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1.3, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.965, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.835, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1.5, policy_loss=-4.71]  

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.839, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1, policy_loss=-4.7]     

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1.2, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.739, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.997, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.722, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=1.02, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 506/5000 [01:46<13:59,  5.35it/s, critic_loss=0.812, policy_loss=-4.72]

cri
pol


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.11, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.02, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.928, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.1, policy_loss=-4.67]  

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.953, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.824, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.15, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.04, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.979, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.913, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.2, policy_loss=-4.7]   

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.17, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.884, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.18, policy_loss=-4.62] 

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.837, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.951, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.768, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.821, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=1.11, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|█         | 507/5000 [01:46<14:03,  5.32it/s, critic_loss=0.867, policy_loss=-4.73]

cri
pol


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.886, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.955, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.845, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.979, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.976, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.783, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.939, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=1.27, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.936, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.962, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=1.58, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.988, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 508/5000 [01:46<14:05,  5.31it/s, critic_loss=0.94, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:47<14:05,  5.31it/s, critic_loss=0.986, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:47<14:05,  5.31it/s, critic_loss=1.06, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|█         | 508/5000 [01:47<14:05,  5.31it/s, critic_loss=0.975, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:47<14:05,  5.31it/s, critic_loss=0.822, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:47<14:05,  5.31it/s, critic_loss=0.794, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 508/5000 [01:47<14:05,  5.31it/s, critic_loss=1.08, policy_loss=-4.73] 

cri
pol


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.895, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.03, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.03, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.795, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.785, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.901, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.732, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.09, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.16, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.854, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1, policy_loss=-4.71]    

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.17, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.03, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.813, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.948, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.85, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=1.29, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 509/5000 [01:47<14:17,  5.24it/s, critic_loss=0.966, policy_loss=-4.72]

cri
pol


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.39, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.845, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.825, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.05, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.945, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.39, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.09, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.1, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.971, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.06, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.13, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.922, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.04, policy_loss=-4.76] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.826, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.14, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.1, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.33, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=1.22, policy_loss=-4.62]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.937, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 510/5000 [01:47<14:38,  5.11it/s, critic_loss=0.825, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.872, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.935, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.02, policy_loss=-4.76] 

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.32, policy_loss=-4.76]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.891, policy_loss=-4.76]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.969, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.896, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.11, policy_loss=-4.62] 

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.995, policy_loss=-4.61]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.929, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.36, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.919, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.929, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.1, policy_loss=-4.75]  

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1, policy_loss=-4.74]  

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.03, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.943, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=0.9, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 511/5000 [01:47<14:45,  5.07it/s, critic_loss=1.34, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.08, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.862, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.04, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.851, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.13, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.984, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.08, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.862, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.931, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.01, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.99, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.13, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.934, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.39, policy_loss=-4.62] 

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.16, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.02, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=0.907, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.26, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.12, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 512/5000 [01:47<15:06,  4.95it/s, critic_loss=1.08, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|█         | 513/5000 [01:47<16:05,  4.65it/s, critic_loss=1.17, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:47<16:05,  4.65it/s, critic_loss=0.925, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:47<16:05,  4.65it/s, critic_loss=1.24, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 513/5000 [01:47<16:05,  4.65it/s, critic_loss=1.07, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:47<16:05,  4.65it/s, critic_loss=0.94, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:47<16:05,  4.65it/s, critic_loss=0.957, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.25, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.01, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.02, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.14, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.43, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.15, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.35, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.03, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.36, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.12, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.06, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=1.36, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=0.967, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 513/5000 [01:48<16:05,  4.65it/s, critic_loss=0.978, policy_loss=-4.67]

cri
pol


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=0.894, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.09, policy_loss=-4.64] 

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.09, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.41, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=0.805, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.03, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.38, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.12, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.06, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=0.981, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=0.905, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.39, policy_loss=-4.64] 

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=0.959, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.22, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.05, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.07, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.2, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=0.865, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 514/5000 [01:48<15:50,  4.72it/s, critic_loss=1.15, policy_loss=-4.65] 

cri
pol


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.851, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.28, policy_loss=-4.63] 

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.15, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.953, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.18, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.25, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.02, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.08, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.921, policy_loss=-4.61]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.948, policy_loss=-4.59]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.25, policy_loss=-4.58] 

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.07, policy_loss=-4.61]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.4, policy_loss=-4.64] 

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.08, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.972, policy_loss=-4.75]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.21, policy_loss=-4.74] 

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.829, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=0.862, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.37, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|█         | 515/5000 [01:48<15:22,  4.86it/s, critic_loss=1.06, policy_loss=-4.61]

cri
pol


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.06, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.935, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.01, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.01, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.12, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.05, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.01, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.02, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.995, policy_loss=-4.6]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.846, policy_loss=-4.61]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.18, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.946, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.09, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.04, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.876, policy_loss=-4.76]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.89, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.07, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=0.945, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.01, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|█         | 516/5000 [01:48<15:14,  4.90it/s, critic_loss=1.17, policy_loss=-4.64]

cri
pol


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.11, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.968, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.903, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.887, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.57, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.788, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.964, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.996, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.17, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.24, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.794, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.09, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.03, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.903, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.943, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=0.846, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.23, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 517/5000 [01:48<15:45,  4.74it/s, critic_loss=1.26, policy_loss=-4.71]

cri
pol


Epoch 1/10:  10%|█         | 518/5000 [01:48<15:36,  4.79it/s, critic_loss=1.24, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:48<15:36,  4.79it/s, critic_loss=0.893, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:48<15:36,  4.79it/s, critic_loss=1.02, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.889, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.835, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.987, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=1.01, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.833, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=1.02, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.98, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.867, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=1.28, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.742, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.986, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.944, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.899, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.89, policy_loss=-4.67] 

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.848, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.927, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 518/5000 [01:49<15:36,  4.79it/s, critic_loss=0.917, policy_loss=-4.72]

cri
pol


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.843, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.762, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=1.01, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.845, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.897, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=1.03, policy_loss=-4.65] 

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.91, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.67, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=1.04, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.93, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.935, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.853, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.978, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.857, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.988, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=1.22, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.761, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.755, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=0.913, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 519/5000 [01:49<16:12,  4.61it/s, critic_loss=1.01, policy_loss=-4.66]

cri
pol


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=1.19, policy_loss=-4.63]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.777, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.87, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.778, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.854, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.658, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.897, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.868, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=1.05, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.793, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.889, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.849, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.845, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=1.08, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.728, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=1.13, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.872, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.769, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.926, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 520/5000 [01:49<16:44,  4.46it/s, critic_loss=0.948, policy_loss=-4.7]

cri
pol


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.75, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.776, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=1.02, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=1.4, policy_loss=-4.75] 

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.902, policy_loss=-4.76]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.796, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.948, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.69, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.948, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.749, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.655, policy_loss=-4.62]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.931, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.757, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.749, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.762, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.715, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.763, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.754, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.817, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 521/5000 [01:49<16:14,  4.60it/s, critic_loss=0.852, policy_loss=-4.69]

cri
pol


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.748, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=1.07, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.78, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.969, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.74, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=1.19, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.806, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.732, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=1.15, policy_loss=-4.68] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.734, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.729, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=1.11, policy_loss=-4.72] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.805, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.714, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.928, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.815, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:49<15:46,  4.73it/s, critic_loss=0.884, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:50<15:46,  4.73it/s, critic_loss=0.824, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 522/5000 [01:50<15:46,  4.73it/s, critic_loss=0.986, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 522/5000 [01:50<15:46,  4.73it/s, critic_loss=0.891, policy_loss=-4.71]

cri
pol


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.739, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=1.17, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.978, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.959, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=1, policy_loss=-4.68]   

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.874, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=1.64, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.66, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.666, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=1, policy_loss=-4.72]    

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.822, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.973, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.644, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.699, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=1.07, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.852, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.713, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.921, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.75, policy_loss=-4.7]  

cri


Epoch 1/10:  10%|█         | 523/5000 [01:50<15:13,  4.90it/s, critic_loss=0.611, policy_loss=-4.68]

cri
pol


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.922, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.634, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.818, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.773, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.871, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.838, policy_loss=-4.7]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.832, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.847, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.729, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.909, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.735, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.585, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.851, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.854, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.805, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.806, policy_loss=-4.67]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.762, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.928, policy_loss=-4.73]

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.68, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|█         | 524/5000 [01:50<14:56,  5.00it/s, critic_loss=0.848, policy_loss=-4.73]

cri
pol


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.874, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.805, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.749, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.706, policy_loss=-4.65]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=1.25, policy_loss=-4.69] 

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.822, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.741, policy_loss=-4.74]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.703, policy_loss=-4.71]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.69, policy_loss=-4.73] 

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.68, policy_loss=-4.7] 

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.883, policy_loss=-4.68]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.775, policy_loss=-4.66]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.578, policy_loss=-4.64]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=1.15, policy_loss=-4.66] 

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.869, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.884, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.762, policy_loss=-4.69]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.863, policy_loss=-4.72]

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=1.14, policy_loss=-4.71] 

cri


Epoch 1/10:  10%|█         | 525/5000 [01:50<14:38,  5.10it/s, critic_loss=0.809, policy_loss=-4.71]

cri
pol


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.955, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.914, policy_loss=-4.69]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.774, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.687, policy_loss=-4.66]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.867, policy_loss=-4.65]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.766, policy_loss=-4.65]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.73, policy_loss=-4.67] 

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.807, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.805, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.739, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.844, policy_loss=-4.69]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.769, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=1.01, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.676, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.569, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.769, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.81, policy_loss=-4.68] 

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.781, policy_loss=-4.66]

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=1.14, policy_loss=-4.69] 

cri


Epoch 1/10:  11%|█         | 526/5000 [01:50<14:24,  5.18it/s, critic_loss=0.795, policy_loss=-4.66]

cri
pol


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=1.18, policy_loss=-4.68] 

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.854, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.767, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.963, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=1, policy_loss=-4.73]    

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.864, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.725, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.769, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.661, policy_loss=-4.62]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.853, policy_loss=-4.62]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.848, policy_loss=-4.63]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.733, policy_loss=-4.63]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=1.15, policy_loss=-4.68] 

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.809, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=1.14, policy_loss=-4.75] 

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.634, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.794, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.934, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.592, policy_loss=-4.64]

cri


Epoch 1/10:  11%|█         | 527/5000 [01:50<14:10,  5.26it/s, critic_loss=0.797, policy_loss=-4.63]

cri
pol


Epoch 1/10:  11%|█         | 528/5000 [01:50<14:16,  5.22it/s, critic_loss=1.16, policy_loss=-4.63] 

cri


Epoch 1/10:  11%|█         | 528/5000 [01:50<14:16,  5.22it/s, critic_loss=0.954, policy_loss=-4.65]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:50<14:16,  5.22it/s, critic_loss=0.81, policy_loss=-4.69] 

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.701, policy_loss=-4.69]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.783, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.766, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.733, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=1.03, policy_loss=-4.68] 

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.814, policy_loss=-4.66]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.771, policy_loss=-4.66]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.694, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.808, policy_loss=-4.74]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.973, policy_loss=-4.74]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.714, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.853, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.872, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.99, policy_loss=-4.69] 

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.865, policy_loss=-4.66]

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.84, policy_loss=-4.63] 

cri


Epoch 1/10:  11%|█         | 528/5000 [01:51<14:16,  5.22it/s, critic_loss=0.766, policy_loss=-4.67]

cri
pol


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.883, policy_loss=-4.68]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.775, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.846, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.977, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.667, policy_loss=-4.76]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.715, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.801, policy_loss=-4.72]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=1.3, policy_loss=-4.73]  

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.909, policy_loss=-4.68]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=1.13, policy_loss=-4.69] 

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.74, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.594, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.693, policy_loss=-4.63]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.873, policy_loss=-4.65]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.764, policy_loss=-4.66]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=1.01, policy_loss=-4.7]  

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.668, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.754, policy_loss=-4.73]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.859, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 529/5000 [01:51<14:15,  5.22it/s, critic_loss=0.793, policy_loss=-4.69]

cri
pol


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.802, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.987, policy_loss=-4.7]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.82, policy_loss=-4.71]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.806, policy_loss=-4.69]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.977, policy_loss=-4.7] 

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.706, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.672, policy_loss=-4.68]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.805, policy_loss=-4.68]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.811, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.96, policy_loss=-4.67] 

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.885, policy_loss=-4.67]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.723, policy_loss=-4.65]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<14:03,  5.30it/s, critic_loss=0.922, policy_loss=-4.65]

cri


Epoch 1/10:  11%|█         | 530/5000 [01:51<15:40,  4.76it/s, critic_loss=0.922, policy_loss=-4.65]


KeyboardInterrupt: 