In [1]:
import scipy.signal
import sys
import torch
import torch.nn as nn
import numpy as np

In [2]:
from typing import Dict, List, Optional, Tuple
import gym
from PIL import Image
# from pyvirtualdisplay import Display
# Display().start()
from datetime import datetime
from tqdm import tqdm

In [3]:
import math
import random
from copy import deepcopy
import torch
from torch.optim import Adam
from torch.optim import RMSprop
import gym
import time
from collections import namedtuple, deque
import neptune.new as neptune

In [4]:
import robosuite as suite
from robosuite.controllers import load_controller_config
from robosuite.controllers.controller_factory import reset_controllers
from robosuite.utils import observables
from robosuite.utils.input_utils import *
from robosuite.robots import Bimanual
import imageio
import numpy as np
import robosuite.utils.macros as macros
macros.IMAGE_CONVENTION = "opencv"

In [5]:
nep_log = neptune.init(
    project="xhnfirst/DDPG-robosuite",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI1NTg5MDI2OS01MTVmLTQ2YjUtODA1Yy02ZWQyNDgxZDcwN2UifQ==",
)

https://app.neptune.ai/xhnfirst/DDPG-robosuite/e/DDPGROB-150
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [6]:
options = {
    'env_name': 'EElab_test2',
    "robots": "UR5e"
}
controller_name = "JOINT_VELOCITY"
options["controller_configs"] = suite.load_controller_config(default_controller=controller_name)

env = suite.make(
    **options,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=False,
    gripper_types=None,
    renderer = 'mujoco',

)

test_env = suite.make(
    **options,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=False,
    gripper_types=None,
    renderer = 'mujoco',
)


video_env = suite.make(
    **options,
    gripper_types=None,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=True,
    use_object_obs=True, 
    camera_names='Labviewer',
    camera_heights=512,
    camera_widths=512,
    # control_freq=200,
    renderer = 'mujoco',
)

frame = []
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device = ', device)

device =  cuda


In [7]:
def mlp(sizes, activation, output_activation=nn.Identity):
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes)-2 else output_activation
        layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
    return nn.Sequential(*layers)


class MLPActor(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit):
        super().__init__()
        pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
        self.pi = mlp(pi_sizes, activation, nn.Tanh)
        self.act_limit = act_limit

    def forward(self, obs):
        # Return output from network scaled to action space limits.
        return self.act_limit * self.pi(obs)

class MLPQFunction(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
        super().__init__()
        self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation)

    def forward(self, obs, act):
        q = self.q(torch.cat([obs, act], dim=-1))
        return torch.squeeze(q, -1) # Critical to ensure q has right shape.

class MLPActorCritic(nn.Module):

    def __init__(self, hidden_sizes=(256,256),
                 activation=nn.ReLU, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        super().__init__()

        obs_dim = 35
        act_dim = 6
        act_limit = 1

        # build policy and value functions
        self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit).to(device)
        self.q = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation).to(device)

    def act(self, obs):
        with torch.no_grad():
            return self.pi(obs)

In [8]:
Transition = namedtuple('Transition',
                        ('obs', 'act', 'rew', 'next_obs', 'done'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([],maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [9]:

params = {
    "dropout": 0.2,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "hid": 256,
    "l": 3,
    "seed": 0,
    "steps_per_epoch": 3000,
    "steps_video": 30000,
    "epochs": 1000,
    "replay_size": int(1e8),
    "gamma": 0.99,
    "polyak": 0.995,
    "pi_lr": 1e-4,
    "q_lr": 1e-4,
    "batch_size": 1000,
    "start_steps": 10000, 
    "update_after": 5000,
    "update_every": 100,
    "act_noise": 0.01,
    "num_test_episodes": 5,
    "max_ep_len": 1000,
    "max_video_len": 1000,
    "save_model_len": 10000,
    # "obs_dim": 47,
    # "act_dim": 7,
    # "act_limit": 1
}

ac_kwargs=dict(hidden_sizes=[params["hid"]]*params["l"])

In [10]:
nep_log["parameters"] = params

torch.manual_seed(params["seed"])
np.random.seed(params["seed"])

obs_dim = 35
print('obs_dim = ', obs_dim)
act_dim = 6
print('act_dim = ', act_dim)
# Action limit for clamping: critically, assumes all dimensions share the same bound!
act_limit = 1
print('act_limit = ', act_limit)
# Create actor-critic module and target networks
ac = MLPActorCritic(**ac_kwargs)
ac_targ = deepcopy(ac)

# Freeze target networks with respect to optimizers (only update via polyak averaging)
for p in ac_targ.parameters():
    p.requires_grad = False

memory = ReplayMemory(params["replay_size"])

obs_dim =  35
act_dim =  6
act_limit =  1


In [11]:
# Set up function for computing DDPG Q-loss
def compute_loss_q(data):

    o = torch.cat(data.obs).float()
    a = torch.cat(data.act).float()
    r = torch.cat(data.rew).float()
    o2 =torch.cat(data.next_obs).float()
    d = torch.cat(data.done).float()

    q = ac.q(o,a)


    # Bellman backup for Q function
    with torch.no_grad():
        q_pi_targ = ac_targ.q(o2, ac_targ.pi(o2))
        backup = r + params["gamma"] * (1 - d) * q_pi_targ

    # MSE loss against Bellman backup
    loss_q = ((q - backup)**2).mean()

    return loss_q

# Set up function for computing DDPG pi loss
def compute_loss_pi(data):

    o = torch.cat(data.obs).float()

    q_pi = ac.q(o, ac.pi(o))

    return -q_pi.mean()


In [12]:
pi_optimizer = RMSprop(ac.pi.parameters(), lr=params["pi_lr"])
q_optimizer = RMSprop(ac.q.parameters(), lr=params["q_lr"])

def update(data):
    # First run one gradient descent step for Q.


    q_optimizer.zero_grad()
    loss_q = compute_loss_q(data)

    loss_q.backward()

    q_optimizer.step()


    # Freeze Q-network so you don't waste computational effort 
    # computing gradients for it during the policy learning step.
    for p in ac.q.parameters():
        p.requires_grad = False

    # Next run one gradient descent step for pi.
    pi_optimizer.zero_grad()
    loss_pi = compute_loss_pi(data)
    loss_pi.backward()
    pi_optimizer.step()

    # Unfreeze Q-network so you can optimize it at next DDPG step.
    for p in ac.q.parameters():
        p.requires_grad = True



    # Finally, update target networks by polyak averaging.
    with torch.no_grad():
        for p, p_targ in zip(ac.parameters(), ac_targ.parameters()):
            # NB: We use an in-place operations "mul_", "add_" to update target
            # params, as opposed to "mul" and "add", which would make new tensors.
            p_targ.data.mul_(params["polyak"])
            p_targ.data.add_((1 - params["polyak"]) * p.data)


In [13]:



def get_action(o, noise_scale):
    a = ac.act(torch.as_tensor(o, dtype=torch.float32))
    a += noise_scale * torch.randn(act_dim).to(device)
    return torch.clip(a, -act_limit, act_limit)

def test_agent(epoch):
    test_main = 0
    test_step = 0
    for j in range(params["num_test_episodes"]):
        obs, d, test_ep_ret, test_ep_len = test_env.reset(), False, 0, 0
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], dtype=torch.float32, device=device)
        while not(d or (test_ep_len == params["max_ep_len"])):
            a_cpu = get_action(o, 0).cpu().data.numpy()
            obs, r, d, _ = test_env.step(a_cpu[0])
            o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
            o = torch.tensor([o], dtype=torch.float32, device=device)
            test_ep_ret += r
            test_ep_len += 1
        test_ep_main = test_ep_ret/test_ep_len
        test_step +=1
        test_main += test_ep_main
    print('test_rew_main = ', float(test_main/test_step))
    nep_log["test/reward"].log(test_main/test_step)
    
def video_agent(epoch):
    obs, d, test_ep_len = video_env.reset(), False, 0
    o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
    o = torch.tensor([o], dtype=torch.float32, device=device)
    now = datetime.now()
    current_time = str(now.isoformat())
    writer = imageio.get_writer(
        "/home/xhnfly/Cosmic_rays_X/X_Robot/robosuite/robosuite/demos/video/DDPG_UR5_%s_ep_%d.mp4" % (current_time, epoch), fps=100)
    frame = obs["Labviewer_image"]
    writer.append_data(frame)

    while not(d or (test_ep_len == params["max_video_len"])):
        a_cpu = get_action(o, 0).cpu().data.numpy()
        obs, _, d, _ = video_env.step(a_cpu[0])
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], dtype=torch.float32, device=device)
        frame = obs["Labviewer_image"]
        writer.append_data(frame)
        test_ep_len += 1
    writer.close()
    nep_log['video'] = neptune.types.File('/home/xhnfly/Cosmic_rays_X/X_Robot/robosuite/robosuite/demos/video/DDPG_UR5_%s_ep_%d.mp4' % (current_time, epoch))





In [14]:
# obs = {
#     'robot0_joint_pos_cos': None,
#     'robot0_joint_pos_sin': None,
#     'robot0_joint_vel': None,
#     'robot0_eef_pos': None,
#     'robot0_eef_quat': None,
#     'robot0_gripper_qpos': None,
#     'robot0_gripper_qvel': None,
#     'cubeA_pos': None,
#     'cubeA_quat': None,
#     'cubeB_pos': None,
#     'cubeB_quat': None,
#     'gripper_to_cubeA': None,
#     'gripper_to_cubeB': None,
#     'cubeA_to_cubeB': None,
# }

obs, ep_ret, ep_len = env.reset(), 0, 0

o = list(obs['robot0_proprio-state']) + list(obs['object-state'])

# env.viewer.set_camera(camera_id=0)


# Define neutral value
neutral = np.zeros(7)

# Keep track of done variable to know when to break loop

# Prepare for interaction with environment
total_steps = params["steps_per_epoch"] * params["epochs"]
start_time = time.time()

o = torch.tensor([o], device=device)


start_time_rec = datetime.now()
r_true = 0
total_main = 0
ep_rew_main = 0
reward_dict={}

In [15]:
# Main loop: collect experience in env and update/log each epoch
low, high = env.action_spec

for t in tqdm(range(total_steps)):
    
    # Until start_steps have elapsed, randomly sample actions
    # from a uniform distribution for better exploration. Afterwards, 
    # use the learned policy (with some noise, via act_noise). 
    if t > params["start_steps"]:
        a = get_action(o, params["act_noise"])      # Tensor
    else:
        a = torch.tensor([np.random.uniform(low, high)], dtype=torch.float32, device=device)
        
    a_cpu = a.cpu().data.numpy()
    # Step the env
    obs2, r, d, _ = env.step(a_cpu[0])
    
    o2 = list(obs2['robot0_proprio-state']) + list(obs2['object-state'])
    # print('len(o2) = ', len(o2))

    ep_len += 1
    total_main += r


    # Ignore the "done" signal if it comes from hitting the time
    # horizon (that is, when it's an artificial terminal signal
    # that isn't based on the agent's state)
    d = False if ep_len==params["max_ep_len"] else d

    o2 = torch.tensor([o2], dtype=torch.float32, device=device)
    r = torch.tensor([r], dtype=torch.float32, device=device)
    d = torch.tensor([d], dtype=torch.float32, device=device)

    # Store experience to replay buffer
    memory.push(o, a, r, o2, d)
    nep_log["train/o"].log(o)
    nep_log["train/a"].log(a)
    nep_log["train/r"].log(r)
    nep_log["train/o2"].log(o2)
    nep_log["train/d"].log(d)

    # Super critical, easy to overlook step: make sure to update 
    # most recent observation!
    o=o2
    ep_ret += r
    
    
    # End of trajectory handling
    if d or (ep_len == params["max_ep_len"]):
        ep_rew = ep_ret/ep_len
        obs, ep_ret, ep_len = env.reset(), 0, 0
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], device=device)


    # Update handling
    if t >= params["update_after"] and t % params["update_every"] == 0:
        for i in range(params["update_every"]):

            transitions = memory.sample(params["batch_size"])
            # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
            # detailed explanation). This converts batch-array of Transitions
            # to Transition of batch-arrays.
            batch = Transition(*zip(*transitions))
            update(data=batch)

    # End of epoch handling
    if (t+1) % params["steps_per_epoch"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        ep_rew_main = ep_rew

        nep_log["train/reward"].log(ep_rew_main)
        print('ep_rew_main = ', ep_rew_main.cpu().data.numpy())
        # Test the performance of the deterministic version of the agent.
        test_agent(epoch)
        

    if (t+1) % params["steps_video"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        now = datetime.now()
        current_time = str(now.isoformat())
        print('current_time = ', current_time)
        video_agent(epoch)
        now = datetime.now()
        current_time = str(now.isoformat())
        print('current_time = ', current_time)

    if (t+1) % params["save_model_len"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        now = datetime.now()
        current_time = str(now.isoformat())
        torch.save({
                    'model of ac.q': ac.q.state_dict(),
                    'model of ac.pi': ac.pi.state_dict(),
                    'q_optimizer_state_dict': q_optimizer.state_dict(),
                    'pi_optimizer_state_dict': pi_optimizer.state_dict(),
                    
                    }, "model_nn/model_nn_%s%d.pt" % (current_time, epoch))


        

  0%|          | 2990/3000000 [00:37<9:09:08, 90.96it/s] 

ep_rew_main =  [8.597451e-09]


  0%|          | 3009/3000000 [01:27<906:55:21,  1.09s/it] 

test_rew_main =  1.0161862348865292e-07


  0%|          | 5995/3000000 [02:15<10:01:35, 82.95it/s] 

ep_rew_main =  [5.730241e-09]


  0%|          | 6000/3000000 [03:25<2087:56:17,  2.51s/it]

test_rew_main =  6.196886793914428e-05


  0%|          | 8998/3000000 [04:37<10:30:30, 79.06it/s]  

ep_rew_main =  [4.9229847e-08]


  0%|          | 9000/3000000 [05:44<2428:52:30,  2.92s/it]

test_rew_main =  2.44473897303723e-06


  0%|          | 11993/3000000 [07:01<13:14:13, 62.70it/s] 

ep_rew_main =  [8.902153e-06]


  0%|          | 12000/3000000 [08:09<2430:16:14,  2.93s/it]

test_rew_main =  0.00030100237501047816


  0%|          | 14999/3000000 [09:31<16:39:00, 49.80it/s]  

ep_rew_main =  [0.00031335]


  0%|          | 15000/3000000 [10:44<4036:15:27,  4.87s/it]

test_rew_main =  0.0018866426597890999


  1%|          | 17995/3000000 [12:07<13:24:53, 61.75it/s]  

ep_rew_main =  [1.1483119e-05]


  1%|          | 18000/3000000 [13:16<2702:32:42,  3.26s/it]

test_rew_main =  0.0031980769210294253


  1%|          | 20996/3000000 [14:39<13:54:40, 59.48it/s]  

ep_rew_main =  [2.8387456e-05]


  1%|          | 21000/3000000 [15:48<2842:28:44,  3.44s/it]

test_rew_main =  1.415209907410535e-05


  1%|          | 23997/3000000 [17:11<13:24:57, 61.62it/s]  

ep_rew_main =  [0.00025407]


  1%|          | 24000/3000000 [18:18<2877:35:39,  3.48s/it]

test_rew_main =  1.0732264373358948e-06


  1%|          | 26999/3000000 [19:41<15:02:03, 54.93it/s]  

ep_rew_main =  [1.6191741e-07]


  1%|          | 27000/3000000 [20:41<3318:33:48,  4.02s/it]

test_rew_main =  1.4039673487495904e-07


  1%|          | 29993/3000000 [22:01<13:30:36, 61.07it/s]  

ep_rew_main =  [0.00018366]


  1%|          | 29993/3000000 [22:20<13:30:36, 61.07it/s]

test_rew_main =  2.8848262679035372e-05
current_time =  2022-02-02T22:28:33.827759


  1%|          | 30000/3000000 [24:29<5231:54:44,  6.34s/it]

current_time =  2022-02-02T22:29:56.597569


  1%|          | 32993/3000000 [25:51<12:54:41, 63.83it/s]  

ep_rew_main =  [2.5441335e-07]


  1%|          | 33000/3000000 [26:54<2233:13:56,  2.71s/it]

test_rew_main =  0.00045103840501196506


  1%|          | 35996/3000000 [28:11<12:41:35, 64.86it/s]  

ep_rew_main =  [0.07967763]


  1%|          | 36000/3000000 [29:08<2288:50:47,  2.78s/it]

test_rew_main =  0.0005354601048341747


  1%|▏         | 38993/3000000 [30:21<11:04:44, 74.24it/s]  

ep_rew_main =  [0.05740261]


  1%|▏         | 39000/3000000 [31:18<1674:37:05,  2.04s/it]

test_rew_main =  0.00018165100894398912


  1%|▏         | 41994/3000000 [32:36<12:33:52, 65.40it/s]  

ep_rew_main =  [0.00012279]


  1%|▏         | 42000/3000000 [33:48<2530:42:04,  3.08s/it]

test_rew_main =  0.00010354304610152465


  1%|▏         | 44995/3000000 [35:10<13:15:27, 61.91it/s]  

ep_rew_main =  [4.19085e-09]


  2%|▏         | 45000/3000000 [36:13<2454:15:30,  2.99s/it]

test_rew_main =  0.00012866374579257787


  2%|▏         | 47996/3000000 [37:29<12:01:51, 68.16it/s]  

ep_rew_main =  [3.9289117e-07]


  2%|▏         | 48000/3000000 [38:36<2415:14:40,  2.95s/it]

test_rew_main =  0.0029052286458225575


  2%|▏         | 50998/3000000 [39:55<14:18:15, 57.27it/s]  

ep_rew_main =  [8.3307775e-08]


  2%|▏         | 51000/3000000 [41:03<3354:39:05,  4.10s/it]

test_rew_main =  0.0005822826675302461


  2%|▏         | 53994/3000000 [42:26<15:57:02, 51.30it/s]  

ep_rew_main =  [8.829305e-09]


  2%|▏         | 54000/3000000 [43:28<2545:31:47,  3.11s/it]

test_rew_main =  0.001552992540217076


  2%|▏         | 56994/3000000 [44:46<10:56:42, 74.69it/s]  

ep_rew_main =  [0.0015115]


  2%|▏         | 57000/3000000 [45:43<1813:57:23,  2.22s/it]

test_rew_main =  0.00729416735002385


  2%|▏         | 59999/3000000 [47:01<13:02:11, 62.64it/s]  

ep_rew_main =  [1.1029733e-06]


  2%|▏         | 59999/3000000 [47:12<13:02:11, 62.64it/s]

test_rew_main =  0.0012399734344019067
current_time =  2022-02-02T22:53:24.847299


  2%|▏         | 60000/3000000 [49:18<6442:10:02,  7.89s/it]

current_time =  2022-02-02T22:54:45.411959


  2%|▏         | 62998/3000000 [50:33<11:33:41, 70.56it/s]  

ep_rew_main =  [2.379138e-06]


  2%|▏         | 63000/3000000 [51:26<2126:05:14,  2.61s/it]

test_rew_main =  0.00023574508892115648


  2%|▏         | 65992/3000000 [52:42<11:35:48, 70.28it/s]  

ep_rew_main =  [0.02508348]


  2%|▏         | 66000/3000000 [53:39<1731:08:28,  2.12s/it]

test_rew_main =  0.005089419174714787


  2%|▏         | 68999/3000000 [55:00<13:46:13, 59.12it/s]  

ep_rew_main =  [6.1765306e-05]


  2%|▏         | 69000/3000000 [56:01<2878:42:33,  3.54s/it]

test_rew_main =  0.004422435706805214


  2%|▏         | 71997/3000000 [57:19<9:57:06, 81.73it/s]   

ep_rew_main =  [4.137912e-07]


  2%|▏         | 72000/3000000 [58:10<1640:32:03,  2.02s/it]

test_rew_main =  0.002585790754274145


  2%|▏         | 74998/3000000 [59:32<12:05:17, 67.21it/s]  

ep_rew_main =  [0.0259785]


  2%|▎         | 75000/3000000 [1:00:31<2345:53:57,  2.89s/it]

test_rew_main =  0.00036406530419289603


  3%|▎         | 77993/3000000 [1:01:50<12:24:17, 65.43it/s]  

ep_rew_main =  [0.00040044]


  3%|▎         | 78000/3000000 [1:02:46<1822:19:29,  2.25s/it]

test_rew_main =  0.0008812065043548349


  3%|▎         | 80998/3000000 [1:04:05<14:59:01, 54.11it/s]  

ep_rew_main =  [1.4408273e-06]


  3%|▎         | 81000/3000000 [1:05:02<2939:01:12,  3.62s/it]

test_rew_main =  0.017576851816373914


  3%|▎         | 83999/3000000 [1:06:21<11:41:26, 69.29it/s]  

ep_rew_main =  [0.01903733]


  3%|▎         | 84000/3000000 [1:07:29<2798:07:54,  3.45s/it]

test_rew_main =  0.001585207112951903


  3%|▎         | 86993/3000000 [1:08:54<13:04:21, 61.90it/s]  

ep_rew_main =  [0.00504738]


  3%|▎         | 87000/3000000 [1:10:03<2406:22:14,  2.97s/it]

test_rew_main =  7.62398028397058e-05


  3%|▎         | 89994/3000000 [1:11:28<12:19:23, 65.60it/s]  

ep_rew_main =  [0.00013596]


  3%|▎         | 89994/3000000 [1:11:44<12:19:23, 65.60it/s]

test_rew_main =  0.0016606658947940262
current_time =  2022-02-02T23:18:02.978392


  3%|▎         | 90000/3000000 [1:13:58<5164:06:14,  6.39s/it]

current_time =  2022-02-02T23:19:25.975140


  3%|▎         | 92999/3000000 [1:15:26<14:12:14, 56.85it/s]  

ep_rew_main =  [0.02370111]


  3%|▎         | 93000/3000000 [1:16:36<3767:06:15,  4.67s/it]

test_rew_main =  0.0002575112382087757


  3%|▎         | 95997/3000000 [1:17:57<11:13:21, 71.88it/s]  

ep_rew_main =  [0.00023911]


  3%|▎         | 96000/3000000 [1:18:57<2187:45:04,  2.71s/it]

test_rew_main =  0.0036513376405651215


  3%|▎         | 98996/3000000 [1:20:18<11:19:34, 71.15it/s]  

ep_rew_main =  [0.01322937]


  3%|▎         | 99000/3000000 [1:21:26<2407:50:33,  2.99s/it]

test_rew_main =  0.0003532461935592963


  3%|▎         | 101996/3000000 [1:22:48<12:38:07, 63.71it/s] 

ep_rew_main =  [7.735505e-06]


  3%|▎         | 102000/3000000 [1:23:43<2169:23:31,  2.69s/it]

test_rew_main =  0.009895414166895527


  3%|▎         | 104996/3000000 [1:25:01<11:12:19, 71.77it/s]  

ep_rew_main =  [0.00485979]


  4%|▎         | 105000/3000000 [1:25:54<1881:33:16,  2.34s/it]

test_rew_main =  0.010354430766919644


  4%|▎         | 107996/3000000 [1:27:17<14:52:30, 54.01it/s]  

ep_rew_main =  [3.726849e-05]


  4%|▎         | 108000/3000000 [1:28:16<2646:30:50,  3.29s/it]

test_rew_main =  0.0068665041452367365


  4%|▎         | 110993/3000000 [1:29:38<11:28:53, 69.90it/s]  

ep_rew_main =  [1.1075029e-07]


  4%|▎         | 111000/3000000 [1:30:36<1830:56:00,  2.28s/it]

test_rew_main =  0.00014758158101660785


  4%|▍         | 113995/3000000 [1:31:57<13:58:38, 57.35it/s]  

ep_rew_main =  [0.00243578]


  4%|▍         | 114000/3000000 [1:32:55<2189:34:23,  2.73s/it]

test_rew_main =  0.00016270865094611067


  4%|▍         | 116995/3000000 [1:34:15<13:19:06, 60.13it/s]  

ep_rew_main =  [6.893005e-05]


  4%|▍         | 117000/3000000 [1:35:12<2146:58:58,  2.68s/it]

test_rew_main =  4.878899144713068e-07


  4%|▍         | 119998/3000000 [1:36:33<11:43:13, 68.26it/s]  

ep_rew_main =  [1.0935428e-09]


  4%|▍         | 119998/3000000 [1:36:45<11:43:13, 68.26it/s]

test_rew_main =  2.6252952500721346e-06
current_time =  2022-02-02T23:42:54.604060


  4%|▍         | 120000/3000000 [1:38:46<5153:10:02,  6.44s/it]

current_time =  2022-02-02T23:44:13.872068


  4%|▍         | 122998/3000000 [1:40:07<11:15:32, 70.98it/s]  

ep_rew_main =  [2.5144582e-08]


  4%|▍         | 123000/3000000 [1:41:03<1969:01:10,  2.46s/it]

test_rew_main =  4.178507879082361e-08


  4%|▍         | 125994/3000000 [1:42:25<11:04:17, 72.11it/s]  

ep_rew_main =  [8.236421e-07]


  4%|▍         | 126000/3000000 [1:43:18<1620:29:32,  2.03s/it]

test_rew_main =  0.003713792651541558


  4%|▍         | 128998/3000000 [1:44:43<10:41:34, 74.58it/s]  

ep_rew_main =  [0.01088975]


  4%|▍         | 129000/3000000 [1:45:38<1940:03:02,  2.43s/it]

test_rew_main =  0.002343418499559188


  4%|▍         | 131998/3000000 [1:47:09<11:22:39, 70.02it/s]  

ep_rew_main =  [0.06016265]


  4%|▍         | 132000/3000000 [1:48:01<2033:25:40,  2.55s/it]

test_rew_main =  0.012047960294070457


  4%|▍         | 134996/3000000 [1:49:23<10:40:42, 74.53it/s]  

ep_rew_main =  [0.00104262]


  4%|▍         | 135000/3000000 [1:50:19<1788:58:16,  2.25s/it]

test_rew_main =  0.0006017892733364804


  5%|▍         | 137998/3000000 [1:51:40<10:38:12, 74.74it/s]  

ep_rew_main =  [9.9818026e-05]


  5%|▍         | 138000/3000000 [1:52:34<1867:21:19,  2.35s/it]

test_rew_main =  1.8783170522633784e-07


  5%|▍         | 140994/3000000 [1:53:57<12:29:03, 63.61it/s]  

ep_rew_main =  [0.03405017]


  5%|▍         | 141000/3000000 [1:54:56<2105:37:04,  2.65s/it]

test_rew_main =  0.0014516337601329283


  5%|▍         | 143993/3000000 [1:56:21<12:01:35, 65.97it/s]  

ep_rew_main =  [0.01993808]


  5%|▍         | 144000/3000000 [1:57:25<1999:48:34,  2.52s/it]

test_rew_main =  0.0013980985013153364


  5%|▍         | 146993/3000000 [1:58:52<11:58:00, 66.22it/s]  

ep_rew_main =  [0.13782655]


  5%|▍         | 147000/3000000 [1:59:49<1963:42:42,  2.48s/it]

test_rew_main =  0.0012543156675846874


  5%|▍         | 149999/3000000 [2:01:14<11:11:14, 70.77it/s]  

ep_rew_main =  [4.416694e-05]


  5%|▍         | 149999/3000000 [2:01:27<11:11:14, 70.77it/s]

test_rew_main =  0.047429157091563735
current_time =  2022-02-03T00:07:40.798982


  5%|▌         | 150000/3000000 [2:03:35<5689:26:03,  7.19s/it]

current_time =  2022-02-03T00:09:02.288574


  5%|▌         | 152995/3000000 [2:04:59<11:01:48, 71.70it/s]  

ep_rew_main =  [0.01262472]


  5%|▌         | 153000/3000000 [2:05:55<1781:17:02,  2.25s/it]

test_rew_main =  0.025938860223424953


  5%|▌         | 155992/3000000 [2:07:20<11:56:31, 66.15it/s]  

ep_rew_main =  [0.00840623]


  5%|▌         | 156000/3000000 [2:08:14<1606:11:49,  2.03s/it]

test_rew_main =  0.022195525903354982


  5%|▌         | 158993/3000000 [2:09:40<11:26:53, 68.93it/s]  

ep_rew_main =  [0.00333583]


  5%|▌         | 159000/3000000 [2:10:36<1717:37:42,  2.18s/it]

test_rew_main =  0.014991525797433392


  5%|▌         | 161999/3000000 [2:12:03<11:44:46, 67.11it/s]  

ep_rew_main =  [0.03258107]


  5%|▌         | 162000/3000000 [2:13:04<2562:27:57,  3.25s/it]

test_rew_main =  0.02485282867387472


  5%|▌         | 164992/3000000 [2:14:29<11:05:56, 70.95it/s]  

ep_rew_main =  [0.04742249]


  6%|▌         | 165000/3000000 [2:15:22<1584:56:15,  2.01s/it]

test_rew_main =  0.03727017880819761


  6%|▌         | 167993/3000000 [2:16:48<11:58:19, 65.71it/s]  

ep_rew_main =  [0.1392212]


  6%|▌         | 168000/3000000 [2:17:47<1969:57:46,  2.50s/it]

test_rew_main =  0.04139996278285303


  6%|▌         | 170997/3000000 [2:19:18<11:58:03, 65.66it/s]  

ep_rew_main =  [0.00771281]


  6%|▌         | 171000/3000000 [2:20:15<2343:28:51,  2.98s/it]

test_rew_main =  0.027767413072678392


  6%|▌         | 173998/3000000 [2:21:43<12:06:55, 64.79it/s]  

ep_rew_main =  [0.13431434]


  6%|▌         | 174000/3000000 [2:22:43<2279:27:57,  2.90s/it]

test_rew_main =  0.007877338953666407


  6%|▌         | 176999/3000000 [2:24:13<11:46:51, 66.56it/s]  

ep_rew_main =  [0.10021824]


  6%|▌         | 177000/3000000 [2:25:09<2233:15:37,  2.85s/it]

test_rew_main =  0.0244643019560748


  6%|▌         | 179999/3000000 [2:26:37<11:53:41, 65.85it/s]  

ep_rew_main =  [0.09106527]


  6%|▌         | 179999/3000000 [2:26:49<11:53:41, 65.85it/s]

test_rew_main =  0.04203691113243667
current_time =  2022-02-03T00:33:04.521265


  6%|▌         | 180000/3000000 [2:28:58<5689:03:10,  7.26s/it]

current_time =  2022-02-03T00:34:26.171941


  6%|▌         | 182995/3000000 [2:30:29<11:47:34, 66.35it/s]  

ep_rew_main =  [0.00897413]


  6%|▌         | 183000/3000000 [2:31:29<2098:22:46,  2.68s/it]

test_rew_main =  0.023284742753306477


  6%|▌         | 185999/3000000 [2:33:02<12:02:28, 64.92it/s]  

ep_rew_main =  [0.00645387]


  6%|▌         | 186000/3000000 [2:34:00<2529:29:33,  3.24s/it]

test_rew_main =  0.026249777764612174


  6%|▋         | 188998/3000000 [2:35:31<12:24:41, 62.91it/s]  

ep_rew_main =  [0.0456701]


  6%|▋         | 189000/3000000 [2:36:31<2449:29:21,  3.14s/it]

test_rew_main =  0.11001822909441361


  6%|▋         | 191992/3000000 [2:38:03<12:00:42, 64.94it/s]  

ep_rew_main =  [0.02091077]


  6%|▋         | 192000/3000000 [2:39:01<1695:47:18,  2.17s/it]

test_rew_main =  0.026394250992162138


  6%|▋         | 194999/3000000 [2:40:32<11:45:22, 66.28it/s]  

ep_rew_main =  [0.01241282]


  6%|▋         | 195000/3000000 [2:41:30<2315:31:36,  2.97s/it]

test_rew_main =  0.010760363897714809


  7%|▋         | 197998/3000000 [2:43:04<12:22:18, 62.91it/s]  

ep_rew_main =  [0.03469228]


  7%|▋         | 198000/3000000 [2:44:01<2364:16:52,  3.04s/it]

test_rew_main =  0.07401349962177366


  7%|▋         | 200994/3000000 [2:45:33<12:09:14, 63.97it/s]  

ep_rew_main =  [0.01375365]


  7%|▋         | 201000/3000000 [2:46:31<2021:21:21,  2.60s/it]

test_rew_main =  0.015090806704884895


  7%|▋         | 203995/3000000 [2:48:06<12:24:35, 62.59it/s]  

ep_rew_main =  [0.00431916]


  7%|▋         | 204000/3000000 [2:49:02<1978:35:35,  2.55s/it]

test_rew_main =  0.03656811991240109


  7%|▋         | 206993/3000000 [2:50:35<11:24:10, 68.04it/s]  

ep_rew_main =  [0.20688571]


  7%|▋         | 207000/3000000 [2:51:34<1785:02:49,  2.30s/it]

test_rew_main =  0.05181840960216825


  7%|▋         | 209996/3000000 [2:53:09<11:54:08, 65.11it/s]  

ep_rew_main =  [0.117553]


  7%|▋         | 209996/3000000 [2:53:21<11:54:08, 65.11it/s]

test_rew_main =  0.05132262997898361
current_time =  2022-02-03T00:59:35.289910


  7%|▋         | 210000/3000000 [2:55:30<5162:59:54,  6.66s/it]

current_time =  2022-02-03T01:00:57.346426


  7%|▋         | 212992/3000000 [2:57:03<10:41:53, 72.37it/s]  

ep_rew_main =  [0.04896966]


  7%|▋         | 213000/3000000 [2:58:03<1618:57:15,  2.09s/it]

test_rew_main =  0.08666980052537127


  7%|▋         | 215992/3000000 [2:59:39<11:14:28, 68.79it/s]  

ep_rew_main =  [1.6487232e-09]


  7%|▋         | 216000/3000000 [3:00:38<1726:05:49,  2.23s/it]

test_rew_main =  0.04057984267565689


  7%|▋         | 218993/3000000 [3:02:15<11:50:21, 65.25it/s]  

ep_rew_main =  [0.39869756]


  7%|▋         | 219000/3000000 [3:03:15<1830:19:10,  2.37s/it]

test_rew_main =  0.01774532101350139


  7%|▋         | 221994/3000000 [3:04:52<11:56:19, 64.64it/s]  

ep_rew_main =  [0.20207442]


  7%|▋         | 222000/3000000 [3:05:48<1954:29:08,  2.53s/it]

test_rew_main =  0.028727827262465967


  7%|▋         | 224992/3000000 [3:07:24<10:55:33, 70.55it/s]  

ep_rew_main =  [0.0113766]


  8%|▊         | 225000/3000000 [3:08:23<1645:36:58,  2.13s/it]

test_rew_main =  0.21128503216163336


  8%|▊         | 227994/3000000 [3:10:01<12:50:20, 59.97it/s]  

ep_rew_main =  [0.00081013]


  8%|▊         | 228000/3000000 [3:10:59<2026:17:41,  2.63s/it]

test_rew_main =  0.044063914827224236


  8%|▊         | 230995/3000000 [3:12:34<11:28:58, 66.98it/s]  

ep_rew_main =  [3.4733262e-06]


  8%|▊         | 231000/3000000 [3:13:34<1776:11:58,  2.31s/it]

test_rew_main =  0.026252535102151177


  8%|▊         | 233995/3000000 [3:15:10<11:51:33, 64.79it/s]  

ep_rew_main =  [0.15044864]


  8%|▊         | 234000/3000000 [3:16:09<2049:35:39,  2.67s/it]

test_rew_main =  0.1425331898986441


  8%|▊         | 236993/3000000 [3:17:48<11:19:08, 67.81it/s]  

ep_rew_main =  [5.4162323e-05]


  8%|▊         | 237000/3000000 [3:18:46<1725:53:41,  2.25s/it]

test_rew_main =  0.07407103475986002


  8%|▊         | 239996/3000000 [3:20:25<11:06:46, 68.99it/s]  

ep_rew_main =  [0.22641759]


  8%|▊         | 239996/3000000 [3:20:43<11:06:46, 68.99it/s]

test_rew_main =  0.06773289534538311
current_time =  2022-02-03T01:26:52.151533


  8%|▊         | 240000/3000000 [3:22:47<4821:44:00,  6.29s/it]

current_time =  2022-02-03T01:28:14.663137


  8%|▊         | 242997/3000000 [3:24:28<12:06:45, 63.23it/s]  

ep_rew_main =  [0.2695197]


  8%|▊         | 243000/3000000 [3:25:25<2226:30:36,  2.91s/it]

test_rew_main =  0.037728201842881094


  8%|▊         | 245993/3000000 [3:27:06<11:42:38, 65.33it/s]  

ep_rew_main =  [0.21198255]


  8%|▊         | 246000/3000000 [3:28:04<1770:16:10,  2.31s/it]

test_rew_main =  0.024646577201989338


  8%|▊         | 248997/3000000 [3:29:44<12:20:28, 61.92it/s]  

ep_rew_main =  [0.16508512]


  8%|▊         | 249000/3000000 [3:30:46<2443:56:54,  3.20s/it]

test_rew_main =  0.09989105502922568


  8%|▊         | 251997/3000000 [3:32:26<11:39:51, 65.44it/s]  

ep_rew_main =  [0.20278107]


  8%|▊         | 252000/3000000 [3:33:21<1999:01:02,  2.62s/it]

test_rew_main =  0.08008525115858037


  8%|▊         | 254999/3000000 [3:35:03<12:05:52, 63.03it/s]  

ep_rew_main =  [0.10200661]


  8%|▊         | 255000/3000000 [3:36:01<2235:41:01,  2.93s/it]

test_rew_main =  0.05280843033810312


  9%|▊         | 257997/3000000 [3:37:39<11:34:04, 65.84it/s]  

ep_rew_main =  [1.7988428e-06]


  9%|▊         | 258000/3000000 [3:38:36<1985:40:38,  2.61s/it]

test_rew_main =  0.06939393587457705


  9%|▊         | 260992/3000000 [3:40:16<12:04:18, 63.03it/s]  

ep_rew_main =  [0.2522321]


  9%|▊         | 261000/3000000 [3:41:16<1772:02:01,  2.33s/it]

test_rew_main =  0.061748559338058354


  9%|▉         | 263992/3000000 [3:42:57<10:09:01, 74.87it/s]  

ep_rew_main =  [3.4958513e-05]


  9%|▉         | 264000/3000000 [3:43:54<1485:15:52,  1.95s/it]

test_rew_main =  0.01951107724904703


  9%|▉         | 266992/3000000 [3:45:34<11:20:06, 66.97it/s]  

ep_rew_main =  [0.01770785]


  9%|▉         | 267000/3000000 [3:46:32<1654:22:14,  2.18s/it]

test_rew_main =  0.04459487750780003


  9%|▉         | 269997/3000000 [3:48:15<11:38:16, 65.16it/s]  

ep_rew_main =  [0.0530229]


  9%|▉         | 269997/3000000 [3:48:35<11:38:16, 65.16it/s]

test_rew_main =  0.09201602831394509
current_time =  2022-02-03T01:54:38.868893


  9%|▉         | 270000/3000000 [3:50:34<4962:10:08,  6.54s/it]

current_time =  2022-02-03T01:56:01.602235


  9%|▉         | 272997/3000000 [3:52:18<12:09:11, 62.33it/s]  

ep_rew_main =  [0.10464182]


  9%|▉         | 273000/3000000 [3:53:17<2312:36:46,  3.05s/it]

test_rew_main =  0.042947625965014904


  9%|▉         | 275999/3000000 [3:54:58<10:59:47, 68.81it/s]  

ep_rew_main =  [0.00429216]


  9%|▉         | 276000/3000000 [3:55:52<2044:30:40,  2.70s/it]

test_rew_main =  0.03416289218121221


  9%|▉         | 278997/3000000 [3:57:41<14:01:48, 53.87it/s]  

ep_rew_main =  [1.625993e-05]


  9%|▉         | 279000/3000000 [3:58:32<2310:25:40,  3.06s/it]

test_rew_main =  0.010456169536557433


  9%|▉         | 281997/3000000 [4:00:17<13:00:54, 58.01it/s]  

ep_rew_main =  [1.1381096e-05]


  9%|▉         | 282000/3000000 [4:01:17<2417:13:44,  3.20s/it]

test_rew_main =  0.03748885466795111


  9%|▉         | 284996/3000000 [4:03:02<11:18:32, 66.69it/s]  

ep_rew_main =  [0.30116948]


 10%|▉         | 285000/3000000 [4:03:54<1760:23:45,  2.33s/it]

test_rew_main =  0.13496694575235213


 10%|▉         | 287991/3000000 [4:05:39<10:21:13, 72.76it/s]  

ep_rew_main =  [0.04484585]


 10%|▉         | 288000/3000000 [4:06:30<1267:05:42,  1.68s/it]

test_rew_main =  0.09120444416765099


 10%|▉         | 290999/3000000 [4:08:17<11:38:36, 64.63it/s]  

ep_rew_main =  [0.00112149]


 10%|▉         | 291000/3000000 [4:09:16<2555:38:17,  3.40s/it]

test_rew_main =  0.08164130699370423


 10%|▉         | 293996/3000000 [4:11:03<11:39:39, 64.46it/s]  

ep_rew_main =  [0.50963986]


 10%|▉         | 294000/3000000 [4:12:00<1966:54:40,  2.62s/it]

test_rew_main =  0.13281325746058661


 10%|▉         | 296996/3000000 [4:13:48<11:30:30, 65.24it/s]  

ep_rew_main =  [0.37883458]


 10%|▉         | 297000/3000000 [4:14:46<1927:32:57,  2.57s/it]

test_rew_main =  0.019477838688676235


 10%|▉         | 299995/3000000 [4:16:34<12:43:51, 58.91it/s]  

ep_rew_main =  [0.16942252]


 10%|▉         | 299995/3000000 [4:16:46<12:43:51, 58.91it/s]

test_rew_main =  0.09278287406814789
current_time =  2022-02-03T02:23:00.009744


 10%|█         | 300000/3000000 [4:18:54<4928:44:52,  6.57s/it]

current_time =  2022-02-03T02:24:21.518130


 10%|█         | 302995/3000000 [4:20:42<11:16:17, 66.46it/s]  

ep_rew_main =  [0.34818545]


 10%|█         | 303000/3000000 [4:21:40<1860:18:53,  2.48s/it]

test_rew_main =  0.024981039444432897


 10%|█         | 305995/3000000 [4:23:30<11:13:32, 66.66it/s]  

ep_rew_main =  [0.12814154]


 10%|█         | 306000/3000000 [4:24:25<1730:11:06,  2.31s/it]

test_rew_main =  0.1497228215758691


 10%|█         | 308993/3000000 [4:26:16<12:13:47, 61.12it/s]  

ep_rew_main =  [2.373659e-10]


 10%|█         | 309000/3000000 [4:27:19<2021:59:56,  2.71s/it]

test_rew_main =  0.003541726758186376


 10%|█         | 311998/3000000 [4:29:09<11:37:13, 64.25it/s]  

ep_rew_main =  [0.16000095]


 10%|█         | 312000/3000000 [4:30:06<2070:12:10,  2.77s/it]

test_rew_main =  0.052402061044795445


 10%|█         | 314998/3000000 [4:32:02<12:03:07, 61.88it/s]  

ep_rew_main =  [0.33094332]


 10%|█         | 315000/3000000 [4:32:58<2078:45:02,  2.79s/it]

test_rew_main =  0.15252441172847292


 11%|█         | 317995/3000000 [4:34:52<13:30:35, 55.14it/s]  

ep_rew_main =  [0.00139498]


 11%|█         | 318000/3000000 [4:35:51<2115:51:45,  2.84s/it]

test_rew_main =  0.03667827590872647


 11%|█         | 320995/3000000 [4:37:41<10:46:30, 69.06it/s]  

ep_rew_main =  [0.00045809]


 11%|█         | 321000/3000000 [4:38:35<1645:52:12,  2.21s/it]

test_rew_main =  7.075413297199486e-05


 11%|█         | 323994/3000000 [4:40:26<11:09:13, 66.65it/s]  

ep_rew_main =  [0.00010913]


 11%|█         | 324000/3000000 [4:41:21<1639:24:22,  2.21s/it]

test_rew_main =  0.0037732769769703517


 11%|█         | 326993/3000000 [4:43:11<11:36:04, 64.00it/s]  

ep_rew_main =  [0.2596528]


 11%|█         | 327000/3000000 [4:44:10<1721:28:31,  2.32s/it]

test_rew_main =  0.013770659756678164


 11%|█         | 329997/3000000 [4:46:05<12:57:33, 57.23it/s]  

ep_rew_main =  [1.4421185e-07]


 11%|█         | 329997/3000000 [4:46:18<12:57:33, 57.23it/s]

test_rew_main =  0.010824550626253626
current_time =  2022-02-03T02:52:31.525734


 11%|█         | 330000/3000000 [4:48:27<6145:45:53,  8.29s/it]

current_time =  2022-02-03T02:53:54.428898


 11%|█         | 332996/3000000 [4:50:21<11:50:08, 62.59it/s]  

ep_rew_main =  [0.12799829]


 11%|█         | 333000/3000000 [4:51:20<2115:07:12,  2.86s/it]

test_rew_main =  0.05251277443715276


 11%|█         | 335999/3000000 [4:53:11<10:25:23, 70.99it/s]  

ep_rew_main =  [0.00178951]


 11%|█         | 336000/3000000 [4:54:16<2148:33:19,  2.90s/it]

test_rew_main =  0.03579143868656189


 11%|█▏        | 338994/3000000 [4:56:11<13:04:32, 56.53it/s]  

ep_rew_main =  [1.18242795e-07]


 11%|█▏        | 339000/3000000 [4:57:13<2290:43:14,  3.10s/it]

test_rew_main =  0.005815085823970226


 11%|█▏        | 341999/3000000 [4:59:08<12:02:26, 61.32it/s]  

ep_rew_main =  [0.00186569]


 11%|█▏        | 342000/3000000 [5:00:08<2418:56:10,  3.28s/it]

test_rew_main =  0.04449277711695453


 11%|█▏        | 344993/3000000 [5:02:03<11:34:53, 63.68it/s]  

ep_rew_main =  [0.08948574]


 12%|█▏        | 345000/3000000 [5:02:58<1591:57:29,  2.16s/it]

test_rew_main =  0.003936674127621429


 12%|█▏        | 347995/3000000 [5:04:54<13:54:25, 52.97it/s]  

ep_rew_main =  [0.02315201]


 12%|█▏        | 348000/3000000 [5:05:50<2192:49:35,  2.98s/it]

test_rew_main =  0.0069157382310357


 12%|█▏        | 350997/3000000 [5:07:48<11:57:12, 61.56it/s]  

ep_rew_main =  [0.12482032]


 12%|█▏        | 351000/3000000 [5:08:44<2164:57:12,  2.94s/it]

test_rew_main =  0.037521304781295894


 12%|█▏        | 353992/3000000 [5:10:41<10:23:20, 70.75it/s]  

ep_rew_main =  [0.00136795]


 12%|█▏        | 354000/3000000 [5:11:38<1447:14:49,  1.97s/it]

test_rew_main =  0.05203108310793394


 12%|█▏        | 356992/3000000 [5:13:33<11:08:47, 65.87it/s]  

ep_rew_main =  [1.6016123e-05]


 12%|█▏        | 357000/3000000 [5:14:32<1639:15:15,  2.23s/it]

test_rew_main =  0.10058540465982968


 12%|█▏        | 359998/3000000 [5:16:26<10:39:47, 68.77it/s]  

ep_rew_main =  [1.527977e-07]


 12%|█▏        | 359998/3000000 [5:16:39<10:39:47, 68.77it/s]

test_rew_main =  0.0860307237793765
current_time =  2022-02-03T03:22:50.494656


 12%|█▏        | 360000/3000000 [5:18:44<4385:03:33,  5.98s/it]

current_time =  2022-02-03T03:24:11.262741


 12%|█▏        | 362999/3000000 [5:20:41<11:33:56, 63.33it/s]  

ep_rew_main =  [0.26233295]


 12%|█▏        | 363000/3000000 [5:21:37<2117:30:34,  2.89s/it]

test_rew_main =  0.06027660883551593


 12%|█▏        | 365999/3000000 [5:23:39<14:06:59, 51.83it/s]  

ep_rew_main =  [0.01827315]


 12%|█▏        | 366000/3000000 [5:24:37<2842:35:42,  3.89s/it]

test_rew_main =  0.01806963357662308


 12%|█▏        | 368996/3000000 [5:26:35<11:14:41, 64.99it/s]  

ep_rew_main =  [0.3025106]


 12%|█▏        | 369000/3000000 [5:27:40<2115:54:15,  2.90s/it]

test_rew_main =  0.03201785419309532


 12%|█▏        | 371995/3000000 [5:29:41<11:32:03, 63.29it/s]  

ep_rew_main =  [0.12290753]


 12%|█▏        | 372000/3000000 [5:30:39<2008:11:48,  2.75s/it]

test_rew_main =  0.028620147232095515


 12%|█▏        | 374994/3000000 [5:32:40<11:25:33, 63.82it/s]  

ep_rew_main =  [0.339482]


 12%|█▎        | 375000/3000000 [5:33:44<1934:28:25,  2.65s/it]

test_rew_main =  0.047107549622803384


 13%|█▎        | 377998/3000000 [5:35:44<11:15:59, 64.65it/s]  

ep_rew_main =  [1.9616535e-07]


 13%|█▎        | 378000/3000000 [5:36:44<2143:17:43,  2.94s/it]

test_rew_main =  0.03152044658297228


 13%|█▎        | 380996/3000000 [5:38:44<11:37:55, 62.54it/s]  

ep_rew_main =  [0.05121677]


 13%|█▎        | 381000/3000000 [5:39:38<1889:41:42,  2.60s/it]

test_rew_main =  0.0878912554620069


 13%|█▎        | 383997/3000000 [5:41:36<11:10:09, 65.06it/s]  

ep_rew_main =  [0.18563373]


 13%|█▎        | 384000/3000000 [5:42:32<1920:57:11,  2.64s/it]

test_rew_main =  0.00893558972314057


 13%|█▎        | 386995/3000000 [5:44:35<11:14:38, 64.55it/s]  

ep_rew_main =  [0.35263237]


 13%|█▎        | 387000/3000000 [5:45:41<2047:06:32,  2.82s/it]

test_rew_main =  0.15081335196240506


 13%|█▎        | 389996/3000000 [5:47:50<10:34:17, 68.58it/s]  

ep_rew_main =  [0.26363227]


 13%|█▎        | 389996/3000000 [5:48:02<10:34:17, 68.58it/s]

test_rew_main =  0.08355114672489268
current_time =  2022-02-03T03:54:15.431531


 13%|█▎        | 390000/3000000 [5:50:09<4055:07:52,  5.59s/it]

current_time =  2022-02-03T03:55:36.464630


 13%|█▎        | 392994/3000000 [5:52:13<10:32:58, 68.64it/s]  

ep_rew_main =  [0.00100724]


 13%|█▎        | 393000/3000000 [5:53:14<1734:24:57,  2.40s/it]

test_rew_main =  0.1508407459614532


 13%|█▎        | 395992/3000000 [5:55:17<11:31:10, 62.79it/s]  

ep_rew_main =  [0.4299707]


 13%|█▎        | 396000/3000000 [5:56:25<1833:54:53,  2.54s/it]

test_rew_main =  0.13505305099177659


 13%|█▎        | 398996/3000000 [5:58:31<12:46:49, 56.53it/s]  

ep_rew_main =  [4.2278163e-08]


 13%|█▎        | 399000/3000000 [5:59:30<2109:17:29,  2.92s/it]

test_rew_main =  0.17610831510367705


 13%|█▎        | 401994/3000000 [6:01:35<10:54:17, 66.18it/s]  

ep_rew_main =  [0.00378981]


 13%|█▎        | 402000/3000000 [6:02:37<1672:41:44,  2.32s/it]

test_rew_main =  0.00410595813354519


 13%|█▎        | 404995/3000000 [6:04:45<11:26:39, 62.99it/s]  

ep_rew_main =  [0.35819447]


 14%|█▎        | 405000/3000000 [6:05:42<1870:30:29,  2.59s/it]

test_rew_main =  0.05844004881588226


 14%|█▎        | 407997/3000000 [6:07:47<11:15:26, 63.96it/s]  

ep_rew_main =  [0.22285874]


 14%|█▎        | 408000/3000000 [6:08:41<1805:40:44,  2.51s/it]

test_rew_main =  0.25013002316777605


 14%|█▎        | 410997/3000000 [6:10:48<14:09:29, 50.79it/s]  

ep_rew_main =  [0.00615185]


 14%|█▎        | 411000/3000000 [6:11:51<2680:03:41,  3.73s/it]

test_rew_main =  0.18366807798374435


 14%|█▍        | 413994/3000000 [6:13:59<11:22:28, 63.15it/s]  

ep_rew_main =  [0.09709208]


 14%|█▍        | 414000/3000000 [6:14:58<1876:13:48,  2.61s/it]

test_rew_main =  0.016604760963731095


 14%|█▍        | 416998/3000000 [6:17:03<10:55:38, 65.66it/s]  

ep_rew_main =  [0.01001744]


 14%|█▍        | 417000/3000000 [6:18:01<1822:29:41,  2.54s/it]

test_rew_main =  0.19592830794418864


 14%|█▍        | 419999/3000000 [6:20:09<11:16:14, 63.59it/s]  

ep_rew_main =  [0.3195118]


 14%|█▍        | 419999/3000000 [6:20:25<11:16:14, 63.59it/s]

test_rew_main =  0.049841637442215134
current_time =  2022-02-03T04:26:32.459378


 14%|█▍        | 420000/3000000 [6:22:26<5012:02:56,  6.99s/it]

current_time =  2022-02-03T04:27:54.020902


 14%|█▍        | 422995/3000000 [6:24:35<14:38:09, 48.91it/s]  

ep_rew_main =  [9.262924e-07]


 14%|█▍        | 423000/3000000 [6:25:33<2203:01:47,  3.08s/it]

test_rew_main =  0.10044103759329585


 14%|█▍        | 425998/3000000 [6:27:39<10:17:58, 69.42it/s]  

ep_rew_main =  [0.00274941]


 14%|█▍        | 426000/3000000 [6:28:36<1987:56:34,  2.78s/it]

test_rew_main =  0.1542389385156907


 14%|█▍        | 428995/3000000 [6:30:47<11:10:06, 63.95it/s]  

ep_rew_main =  [0.37832698]


 14%|█▍        | 429000/3000000 [6:31:43<1725:46:18,  2.42s/it]

test_rew_main =  0.08290019156215746


 14%|█▍        | 431995/3000000 [6:33:54<11:22:28, 62.71it/s]  

ep_rew_main =  [0.23666392]


 14%|█▍        | 432000/3000000 [6:34:51<1833:58:16,  2.57s/it]

test_rew_main =  0.04714134135649779


 14%|█▍        | 434994/3000000 [6:37:06<11:20:26, 62.83it/s]  

ep_rew_main =  [0.09278177]


 14%|█▍        | 435000/3000000 [6:38:04<1703:19:07,  2.39s/it]

test_rew_main =  0.031380912713373545


 15%|█▍        | 437993/3000000 [6:40:14<11:20:19, 62.76it/s]  

ep_rew_main =  [0.05808852]


 15%|█▍        | 438000/3000000 [6:41:15<1815:26:23,  2.55s/it]

test_rew_main =  0.073595319988449


 15%|█▍        | 440999/3000000 [6:43:27<11:24:21, 62.32it/s]  

ep_rew_main =  [0.18773267]


 15%|█▍        | 441000/3000000 [6:44:25<2099:52:41,  2.95s/it]

test_rew_main =  0.05728282930853044


 15%|█▍        | 443994/3000000 [6:46:39<11:32:52, 61.48it/s]  

ep_rew_main =  [9.831435e-06]


 15%|█▍        | 444000/3000000 [6:47:36<1661:34:49,  2.34s/it]

test_rew_main =  0.03308703490101721


 15%|█▍        | 446997/3000000 [6:49:47<11:27:39, 61.88it/s]  

ep_rew_main =  [0.0871871]


 15%|█▍        | 447000/3000000 [6:50:43<1839:34:44,  2.59s/it]

test_rew_main =  0.023186103451760372


 15%|█▍        | 449998/3000000 [6:52:53<11:09:33, 63.47it/s]  

ep_rew_main =  [0.02462333]


 15%|█▍        | 449998/3000000 [6:53:09<11:09:33, 63.47it/s]

test_rew_main =  0.15492807495018557
current_time =  2022-02-03T04:59:27.536666


 15%|█▌        | 450000/3000000 [6:55:20<5120:32:26,  7.23s/it]

current_time =  2022-02-03T05:00:47.446541


 15%|█▌        | 452994/3000000 [6:57:35<11:13:45, 63.00it/s]  

ep_rew_main =  [0.41327825]


 15%|█▌        | 453000/3000000 [6:58:34<1714:24:07,  2.42s/it]

test_rew_main =  0.11359183929185386


 15%|█▌        | 455995/3000000 [7:00:47<11:17:55, 62.54it/s]  

ep_rew_main =  [0.21677856]


 15%|█▌        | 456000/3000000 [7:01:46<1937:19:10,  2.74s/it]

test_rew_main =  0.1298284011843271


 15%|█▌        | 458994/3000000 [7:04:00<11:28:04, 61.55it/s]  

ep_rew_main =  [0.3385278]


 15%|█▌        | 459000/3000000 [7:05:01<1916:10:27,  2.71s/it]

test_rew_main =  0.24039464780810116


 15%|█▌        | 461999/3000000 [7:07:13<10:27:15, 67.44it/s]  

ep_rew_main =  [0.01206308]


 15%|█▌        | 462000/3000000 [7:08:14<2202:43:26,  3.12s/it]

test_rew_main =  0.09932794049699131


 15%|█▌        | 464994/3000000 [7:10:33<10:56:33, 64.35it/s]  

ep_rew_main =  [0.581923]


 16%|█▌        | 465000/3000000 [7:11:31<1669:26:15,  2.37s/it]

test_rew_main =  0.3084882391405333


 16%|█▌        | 467997/3000000 [7:13:45<10:59:48, 63.96it/s]  

ep_rew_main =  [0.16201402]


 16%|█▌        | 468000/3000000 [7:14:40<1986:34:25,  2.82s/it]

test_rew_main =  0.1618395894608074


 16%|█▌        | 470998/3000000 [7:16:54<11:04:44, 63.41it/s]  

ep_rew_main =  [0.20619465]


 16%|█▌        | 471000/3000000 [7:17:50<1898:00:56,  2.70s/it]

test_rew_main =  0.11379191931139158


 16%|█▌        | 473994/3000000 [7:20:05<11:22:17, 61.70it/s]  

ep_rew_main =  [0.11160808]


 16%|█▌        | 474000/3000000 [7:21:02<1641:37:40,  2.34s/it]

test_rew_main =  0.12855881047602802


 16%|█▌        | 476997/3000000 [7:23:17<11:18:25, 61.98it/s]  

ep_rew_main =  [0.32629082]


 16%|█▌        | 477000/3000000 [7:24:16<1925:38:47,  2.75s/it]

test_rew_main =  0.018361650037475398


 16%|█▌        | 479999/3000000 [7:26:36<12:44:51, 54.91it/s]  

ep_rew_main =  [0.00562548]


 16%|█▌        | 479999/3000000 [7:26:52<12:44:51, 54.91it/s]

test_rew_main =  0.22385022018937714
current_time =  2022-02-03T05:33:07.492646


 16%|█▌        | 480000/3000000 [7:29:01<5892:42:14,  8.42s/it]

current_time =  2022-02-03T05:34:28.719754


 16%|█▌        | 482996/3000000 [7:31:23<13:50:23, 50.52it/s]  

ep_rew_main =  [0.00152841]


 16%|█▌        | 483000/3000000 [7:32:20<2244:55:18,  3.21s/it]

test_rew_main =  0.09789484733000198


 16%|█▌        | 485998/3000000 [7:34:39<12:51:19, 54.32it/s]  

ep_rew_main =  [0.00211384]


 16%|█▌        | 486000/3000000 [7:35:40<2499:21:20,  3.58s/it]

test_rew_main =  0.16858949058922418


 16%|█▋        | 488993/3000000 [7:38:00<11:23:34, 61.22it/s]  

ep_rew_main =  [0.2286536]


 16%|█▋        | 489000/3000000 [7:39:07<1843:21:55,  2.64s/it]

test_rew_main =  0.10254606147062591


 16%|█▋        | 491998/3000000 [7:41:25<11:08:49, 62.50it/s]  

ep_rew_main =  [0.22733487]


 16%|█▋        | 492000/3000000 [7:42:19<1846:51:15,  2.65s/it]

test_rew_main =  0.07012806908262814


 16%|█▋        | 494995/3000000 [7:44:44<13:12:45, 52.66it/s]  

ep_rew_main =  [5.2148126e-09]


 16%|█▋        | 495000/3000000 [7:45:46<2270:42:33,  3.26s/it]

test_rew_main =  0.03753379662181189


 17%|█▋        | 497999/3000000 [7:48:05<11:24:42, 60.90it/s]  

ep_rew_main =  [0.41770795]


 17%|█▋        | 498000/3000000 [7:49:08<2516:14:16,  3.62s/it]

test_rew_main =  0.07402833233681724


 17%|█▋        | 500994/3000000 [7:51:29<12:05:52, 57.38it/s]  

ep_rew_main =  [9.5504575e-08]


 17%|█▋        | 501000/3000000 [7:52:24<1723:32:44,  2.48s/it]

test_rew_main =  0.05032285381702922


 17%|█▋        | 503997/3000000 [7:54:47<10:58:23, 63.18it/s]  

ep_rew_main =  [0.00803701]


 17%|█▋        | 504000/3000000 [7:55:45<1866:39:02,  2.69s/it]

test_rew_main =  0.11766326946184498


 17%|█▋        | 506994/3000000 [7:58:08<11:13:46, 61.67it/s]  

ep_rew_main =  [0.26483935]


 17%|█▋        | 507000/3000000 [7:59:06<1638:57:54,  2.37s/it]

test_rew_main =  0.09893647341713593


 17%|█▋        | 509992/3000000 [8:01:26<10:10:53, 67.93it/s]  

ep_rew_main =  [9.2417205e-05]


 17%|█▋        | 509992/3000000 [8:01:44<10:10:53, 67.93it/s]

test_rew_main =  0.17934520578428517
current_time =  2022-02-03T06:07:50.914041


 17%|█▋        | 510000/3000000 [8:03:44<3435:30:36,  4.97s/it]

current_time =  2022-02-03T06:09:12.177602


 17%|█▋        | 512997/3000000 [8:06:07<11:20:40, 60.90it/s]  

ep_rew_main =  [0.02829485]


 17%|█▋        | 513000/3000000 [8:07:07<2175:08:46,  3.15s/it]

test_rew_main =  0.12330611246252712


 17%|█▋        | 515998/3000000 [8:09:32<11:25:27, 60.40it/s]  

ep_rew_main =  [0.0621952]


 17%|█▋        | 516000/3000000 [8:10:29<1955:13:57,  2.83s/it]

test_rew_main =  0.0760925694738852


 17%|█▋        | 518998/3000000 [8:12:49<10:35:53, 65.03it/s]  

ep_rew_main =  [0.01633278]


 17%|█▋        | 519000/3000000 [8:13:50<1948:13:53,  2.83s/it]

test_rew_main =  0.26783542323815535


 17%|█▋        | 521993/3000000 [8:16:18<11:32:27, 59.64it/s]  

ep_rew_main =  [2.0856264e-07]


 17%|█▋        | 522000/3000000 [8:17:12<1599:10:24,  2.32s/it]

test_rew_main =  0.13318309462682248


 17%|█▋        | 524994/3000000 [8:19:34<11:12:53, 61.30it/s]  

ep_rew_main =  [0.02706273]


 18%|█▊        | 525000/3000000 [8:20:34<1654:40:37,  2.41s/it]

test_rew_main =  0.12285965749910999


 18%|█▊        | 527999/3000000 [8:23:00<11:50:25, 57.99it/s]  

ep_rew_main =  [0.00485171]


 18%|█▊        | 528000/3000000 [8:24:01<2459:16:00,  3.58s/it]

test_rew_main =  0.020394781883453828


 18%|█▊        | 530997/3000000 [8:26:27<10:50:14, 63.28it/s]  

ep_rew_main =  [0.00215231]


 18%|█▊        | 531000/3000000 [8:27:28<1932:25:26,  2.82s/it]

test_rew_main =  0.06420993533341972


 18%|█▊        | 533995/3000000 [8:30:01<12:28:03, 54.94it/s]  

ep_rew_main =  [3.19611e-08]


 18%|█▊        | 534000/3000000 [8:30:58<2090:00:56,  3.05s/it]

test_rew_main =  0.004639562911025954


 18%|█▊        | 536994/3000000 [8:33:29<12:11:51, 56.09it/s]  

ep_rew_main =  [6.030894e-06]


 18%|█▊        | 537000/3000000 [8:34:29<1991:47:26,  2.91s/it]

test_rew_main =  0.00353902998523309


 18%|█▊        | 539998/3000000 [8:36:59<11:01:54, 61.94it/s]  

ep_rew_main =  [0.2168202]


 18%|█▊        | 539998/3000000 [8:37:16<11:01:54, 61.94it/s]

test_rew_main =  0.050705902819078975
current_time =  2022-02-03T06:43:28.805212


 18%|█▊        | 540000/3000000 [8:39:22<4760:58:58,  6.97s/it]

current_time =  2022-02-03T06:44:50.018760


 18%|█▊        | 542993/3000000 [8:41:47<11:22:15, 60.02it/s]  

ep_rew_main =  [0.4223481]


 18%|█▊        | 543000/3000000 [8:42:46<1597:41:11,  2.34s/it]

test_rew_main =  0.09327779112767312


 18%|█▊        | 545998/3000000 [8:45:15<11:00:32, 61.92it/s]  

ep_rew_main =  [0.00325153]


 18%|█▊        | 546000/3000000 [8:46:23<2265:25:38,  3.32s/it]

test_rew_main =  3.1918527870841326e-05


 18%|█▊        | 548992/3000000 [8:48:53<10:55:46, 62.29it/s]  

ep_rew_main =  [1.0592024e-06]


 18%|█▊        | 549000/3000000 [8:49:59<1668:58:51,  2.45s/it]

test_rew_main =  0.06416118708218238


 18%|█▊        | 551997/3000000 [8:52:31<10:43:53, 63.37it/s]  

ep_rew_main =  [0.01185853]


 18%|█▊        | 552000/3000000 [8:53:39<2091:42:23,  3.08s/it]

test_rew_main =  0.00014024540259210727


 18%|█▊        | 554999/3000000 [8:56:11<10:53:27, 62.36it/s]  

ep_rew_main =  [0.01344162]


 18%|█▊        | 555000/3000000 [8:57:12<2035:05:15,  3.00s/it]

test_rew_main =  0.014206909126200781


 19%|█▊        | 557996/3000000 [8:59:47<13:01:03, 52.11it/s]  

ep_rew_main =  [0.00677679]


 19%|█▊        | 558000/3000000 [9:00:54<2543:48:09,  3.75s/it]

test_rew_main =  0.010716592336453109


 19%|█▊        | 560995/3000000 [9:03:27<11:51:30, 57.13it/s]  

ep_rew_main =  [8.5049915e-06]


 19%|█▊        | 561000/3000000 [9:04:30<2021:22:16,  2.98s/it]

test_rew_main =  0.016774099024127215


 19%|█▉        | 563997/3000000 [9:07:05<12:18:11, 55.00it/s]  

ep_rew_main =  [0.00580641]


 19%|█▉        | 564000/3000000 [9:08:09<2252:15:11,  3.33s/it]

test_rew_main =  0.00037517029265389496


 19%|█▉        | 566995/3000000 [9:10:42<12:02:55, 56.09it/s]  

ep_rew_main =  [0.00012978]


 19%|█▉        | 567000/3000000 [9:11:45<2210:01:13,  3.27s/it]

test_rew_main =  0.050047168972328734


 19%|█▉        | 569999/3000000 [9:14:17<11:13:18, 60.15it/s]  

ep_rew_main =  [0.1704392]


 19%|█▉        | 569999/3000000 [9:14:29<11:13:18, 60.15it/s]

test_rew_main =  0.0003715650204035853
current_time =  2022-02-03T07:20:42.730554


 19%|█▉        | 570000/3000000 [9:16:38<4884:37:46,  7.24s/it]

current_time =  2022-02-03T07:22:06.023304


 19%|█▉        | 572994/3000000 [9:19:14<11:58:09, 56.33it/s]  

ep_rew_main =  [3.59778e-05]


 19%|█▉        | 573000/3000000 [9:20:13<1842:41:56,  2.73s/it]

test_rew_main =  0.0007331769598277627


 19%|█▉        | 575993/3000000 [9:22:44<10:59:34, 61.25it/s]  

ep_rew_main =  [2.9592213e-06]


 19%|█▉        | 576000/3000000 [9:23:40<1463:40:37,  2.17s/it]

test_rew_main =  0.00701650771144194


 19%|█▉        | 578996/3000000 [9:26:16<10:54:41, 61.63it/s]  

ep_rew_main =  [3.758916e-08]


 19%|█▉        | 579000/3000000 [9:27:15<1759:17:20,  2.62s/it]

test_rew_main =  0.00015153519156062227


 19%|█▉        | 581999/3000000 [9:29:48<11:51:34, 56.63it/s]  

ep_rew_main =  [0.00010698]


 19%|█▉        | 582000/3000000 [9:30:43<2161:40:20,  3.22s/it]

test_rew_main =  0.00017696195760146427


 19%|█▉        | 584997/3000000 [9:33:17<10:52:24, 61.69it/s]  

ep_rew_main =  [0.00088159]


 20%|█▉        | 585000/3000000 [9:34:13<1430:03:43,  2.13s/it]

test_rew_main =  1.3450158542714918e-06


 20%|█▉        | 587995/3000000 [9:36:47<11:12:03, 59.82it/s]  

ep_rew_main =  [8.2525673e-07]


 20%|█▉        | 588000/3000000 [9:37:41<1721:20:46,  2.57s/it]

test_rew_main =  6.667947724500311e-05


 20%|█▉        | 590994/3000000 [9:40:18<12:54:59, 51.81it/s]  

ep_rew_main =  [1.2838246e-06]


 20%|█▉        | 591000/3000000 [9:41:13<1853:45:59,  2.77s/it]

test_rew_main =  0.0003934544677513126


 20%|█▉        | 593999/3000000 [9:43:45<10:21:03, 64.57it/s]  

ep_rew_main =  [0.01902102]


 20%|█▉        | 594000/3000000 [9:44:36<1598:35:03,  2.39s/it]

test_rew_main =  0.0008103062473601534


 20%|█▉        | 596998/3000000 [9:47:10<10:55:48, 61.07it/s]  

ep_rew_main =  [1.1061837e-08]


 20%|█▉        | 597000/3000000 [9:48:02<1528:58:16,  2.29s/it]

test_rew_main =  0.0005704941260665529


 20%|█▉        | 599998/3000000 [9:50:33<9:57:26, 66.95it/s]   

ep_rew_main =  [2.3760798e-07]


 20%|█▉        | 599998/3000000 [9:50:51<9:57:26, 66.95it/s]

test_rew_main =  0.00023597904233700091
current_time =  2022-02-03T07:56:57.001022


 20%|██        | 600000/3000000 [9:52:52<4077:48:10,  6.12s/it]

current_time =  2022-02-03T07:58:19.933256


 20%|██        | 602995/3000000 [9:55:25<12:51:51, 51.76it/s]  

ep_rew_main =  [0.00017598]


 20%|██        | 603000/3000000 [9:56:24<2067:45:54,  3.11s/it]

test_rew_main =  2.3624725664863688e-05


 20%|██        | 605993/3000000 [9:59:01<12:08:01, 54.81it/s]  

ep_rew_main =  [0.03858107]


 20%|██        | 606000/3000000 [9:59:55<1518:26:07,  2.28s/it]

test_rew_main =  0.0013503657280404535


 20%|██        | 608998/3000000 [10:02:36<10:25:19, 63.73it/s] 

ep_rew_main =  [2.2687796e-06]


 20%|██        | 609000/3000000 [10:03:33<1845:02:42,  2.78s/it]

test_rew_main =  0.0012457294331730006


 20%|██        | 611992/3000000 [10:06:13<10:46:54, 61.52it/s]  

ep_rew_main =  [0.03645477]


 20%|██        | 612000/3000000 [10:07:13<1480:50:19,  2.23s/it]

test_rew_main =  0.0003345365499219653


 20%|██        | 614999/3000000 [10:09:57<11:03:07, 59.94it/s]  

ep_rew_main =  [0.5002282]


 20%|██        | 615000/3000000 [10:10:54<2092:44:20,  3.16s/it]

test_rew_main =  0.004800059829668404


 21%|██        | 617995/3000000 [10:13:36<11:13:46, 58.92it/s]  

ep_rew_main =  [7.195538e-07]


 21%|██        | 618000/3000000 [10:14:35<1828:28:24,  2.76s/it]

test_rew_main =  7.997484492276683e-05


 21%|██        | 620998/3000000 [10:17:13<11:11:27, 59.05it/s]  

ep_rew_main =  [0.00446356]


 21%|██        | 621000/3000000 [10:18:11<2067:19:18,  3.13s/it]

test_rew_main =  0.0260345563684385


 21%|██        | 623994/3000000 [10:20:52<11:15:20, 58.64it/s]  

ep_rew_main =  [0.00299662]


 21%|██        | 624000/3000000 [10:21:53<1812:30:40,  2.75s/it]

test_rew_main =  0.003308425299199367


 21%|██        | 626996/3000000 [10:24:39<11:33:30, 57.03it/s]  

ep_rew_main =  [0.00069963]


 21%|██        | 627000/3000000 [10:25:43<2091:16:03,  3.17s/it]

test_rew_main =  0.00014519162433923773


 21%|██        | 629997/3000000 [10:28:29<12:01:16, 54.76it/s]  

ep_rew_main =  [1.5481464e-09]


 21%|██        | 629997/3000000 [10:28:45<12:01:16, 54.76it/s]

test_rew_main =  0.0031618976345198597
current_time =  2022-02-03T08:34:58.514966


 21%|██        | 630000/3000000 [10:30:51<4841:04:32,  7.35s/it]

current_time =  2022-02-03T08:36:18.396027


 21%|██        | 632995/3000000 [10:33:36<9:38:13, 68.23it/s]   

ep_rew_main =  [1.02672175e-05]


 21%|██        | 633000/3000000 [10:34:36<1593:22:21,  2.42s/it]

test_rew_main =  0.03430558502722235


 21%|██        | 635999/3000000 [10:37:19<11:12:11, 58.61it/s]  

ep_rew_main =  [4.778217e-09]


 21%|██        | 636000/3000000 [10:38:15<2136:21:39,  3.25s/it]

test_rew_main =  0.0017406376070601462


 21%|██▏       | 638998/3000000 [10:41:02<10:13:42, 64.12it/s]  

ep_rew_main =  [0.01522066]


 21%|██▏       | 639000/3000000 [10:42:04<1964:37:51,  3.00s/it]

test_rew_main =  0.08610055483830312


 21%|██▏       | 641992/3000000 [10:44:49<10:20:54, 63.29it/s]  

ep_rew_main =  [0.00271253]


 21%|██▏       | 642000/3000000 [10:45:48<1466:39:32,  2.24s/it]

test_rew_main =  0.02521592011704758


 21%|██▏       | 644995/3000000 [10:48:33<11:57:21, 54.71it/s]  

ep_rew_main =  [0.0033004]


 22%|██▏       | 645000/3000000 [10:49:39<1929:56:52,  2.95s/it]

test_rew_main =  0.01128096649099753


 22%|██▏       | 647993/3000000 [10:52:25<10:35:38, 61.67it/s]  

ep_rew_main =  [0.2264398]


 22%|██▏       | 648000/3000000 [10:53:31<1715:08:18,  2.63s/it]

test_rew_main =  0.03271455175431342


 22%|██▏       | 650992/3000000 [10:56:18<10:30:53, 62.06it/s]  

ep_rew_main =  [0.00101111]


 22%|██▏       | 651000/3000000 [10:57:21<1322:03:49,  2.03s/it]

test_rew_main =  0.13479143919536554


 22%|██▏       | 653994/3000000 [11:00:07<11:29:37, 56.70it/s]  

ep_rew_main =  [2.1762378e-06]


 22%|██▏       | 654000/3000000 [11:01:03<1643:22:49,  2.52s/it]

test_rew_main =  0.14239120888009277


 22%|██▏       | 656996/3000000 [11:03:50<10:24:38, 62.52it/s]  

ep_rew_main =  [0.09792624]


 22%|██▏       | 657000/3000000 [11:04:53<1970:48:40,  3.03s/it]

test_rew_main =  0.21230717791549975


 22%|██▏       | 659993/3000000 [11:07:41<10:48:04, 60.18it/s]  

ep_rew_main =  [0.12065873]


 22%|██▏       | 659993/3000000 [11:07:57<10:48:04, 60.18it/s]

test_rew_main =  0.0031737363195170907
current_time =  2022-02-03T09:14:01.667446


 22%|██▏       | 660000/3000000 [11:09:56<3773:40:59,  5.81s/it]

current_time =  2022-02-03T09:15:23.984406


 22%|██▏       | 662995/3000000 [11:12:41<10:11:22, 63.71it/s]  

ep_rew_main =  [0.00774022]


 22%|██▏       | 663000/3000000 [11:13:41<1615:52:32,  2.49s/it]

test_rew_main =  0.013828164686956707


 22%|██▏       | 665993/3000000 [11:16:30<10:51:03, 59.75it/s]  

ep_rew_main =  [1.0934287e-08]


 22%|██▏       | 666000/3000000 [11:17:30<1611:28:16,  2.49s/it]

test_rew_main =  0.004348561414352804


 22%|██▏       | 668996/3000000 [11:20:21<12:32:58, 51.60it/s]  

ep_rew_main =  [0.00376254]


 22%|██▏       | 669000/3000000 [11:21:18<2057:47:34,  3.18s/it]

test_rew_main =  0.03230594139868982


 22%|██▏       | 671999/3000000 [11:24:07<12:07:44, 53.32it/s]  

ep_rew_main =  [0.01174405]


 22%|██▏       | 672000/3000000 [11:25:02<2363:37:18,  3.66s/it]

test_rew_main =  0.020437030724595198


 22%|██▏       | 674994/3000000 [11:27:57<10:29:22, 61.57it/s]  

ep_rew_main =  [0.00051052]


 22%|██▎       | 675000/3000000 [11:28:52<1323:49:01,  2.05s/it]

test_rew_main =  0.0035147478586485294


 23%|██▎       | 677995/3000000 [11:31:45<10:41:53, 60.29it/s]  

ep_rew_main =  [0.01061639]


 23%|██▎       | 678000/3000000 [11:32:39<1659:30:22,  2.57s/it]

test_rew_main =  0.0891243038150789


 23%|██▎       | 680993/3000000 [11:35:34<10:17:01, 62.64it/s]  

ep_rew_main =  [0.0138845]


 23%|██▎       | 681000/3000000 [11:36:32<1360:46:36,  2.11s/it]

test_rew_main =  0.05368117035620682


 23%|██▎       | 683993/3000000 [11:39:23<10:17:50, 62.48it/s]  

ep_rew_main =  [0.04815313]


 23%|██▎       | 684000/3000000 [11:40:17<1375:02:56,  2.14s/it]

test_rew_main =  0.16633035053819958


 23%|██▎       | 686996/3000000 [11:43:08<11:12:11, 57.35it/s]  

ep_rew_main =  [0.00021342]


 23%|██▎       | 687000/3000000 [11:44:01<1683:18:23,  2.62s/it]

test_rew_main =  0.08687903664382035


 23%|██▎       | 689995/3000000 [11:46:52<12:32:29, 51.16it/s]  

ep_rew_main =  [9.886838e-06]


 23%|██▎       | 689995/3000000 [11:47:09<12:32:29, 51.16it/s]

test_rew_main =  0.11464753190032059
current_time =  2022-02-03T09:53:14.378998


 23%|██▎       | 690000/3000000 [11:49:12<4669:52:41,  7.28s/it]

current_time =  2022-02-03T09:54:40.140498


 23%|██▎       | 692999/3000000 [11:52:07<12:52:45, 49.76it/s]  

ep_rew_main =  [0.01494341]


 23%|██▎       | 693000/3000000 [11:53:05<2507:08:24,  3.91s/it]

test_rew_main =  0.020590432247902562


 23%|██▎       | 695991/3000000 [11:56:01<10:13:50, 62.56it/s]  

ep_rew_main =  [0.00016865]


 23%|██▎       | 696000/3000000 [11:56:59<1265:56:57,  1.98s/it]

test_rew_main =  0.0003568752734358344


 23%|██▎       | 698992/3000000 [11:59:54<11:03:39, 57.79it/s]  

ep_rew_main =  [0.01122397]


 23%|██▎       | 699000/3000000 [12:00:51<1395:20:01,  2.18s/it]

test_rew_main =  0.022459228934835918


 23%|██▎       | 701995/3000000 [12:03:49<11:58:49, 53.28it/s]  

ep_rew_main =  [0.01997353]


 23%|██▎       | 702000/3000000 [12:04:48<1985:21:00,  3.11s/it]

test_rew_main =  0.1506534238186657


 23%|██▎       | 704998/3000000 [12:07:43<10:39:48, 59.78it/s]  

ep_rew_main =  [0.01772146]


 24%|██▎       | 705000/3000000 [12:08:42<1852:18:21,  2.91s/it]

test_rew_main =  0.0022263164360756317


 24%|██▎       | 707997/3000000 [12:11:35<10:06:11, 63.02it/s]  

ep_rew_main =  [0.00366556]


 24%|██▎       | 708000/3000000 [12:12:34<1759:59:52,  2.76s/it]

test_rew_main =  0.004134392933542464


 24%|██▎       | 710997/3000000 [12:15:26<10:17:33, 61.78it/s]  

ep_rew_main =  [7.7638376e-08]


 24%|██▎       | 711000/3000000 [12:16:23<1589:26:31,  2.50s/it]

test_rew_main =  0.0263752642071229


 24%|██▍       | 713999/3000000 [12:19:23<12:16:25, 51.74it/s]  

ep_rew_main =  [3.0594474e-05]


 24%|██▍       | 714000/3000000 [12:20:29<2829:28:11,  4.46s/it]

test_rew_main =  0.03798529145002311


 24%|██▍       | 716994/3000000 [12:23:42<11:58:00, 52.99it/s]  

ep_rew_main =  [0.00414786]


 24%|██▍       | 717000/3000000 [12:24:42<1851:45:44,  2.92s/it]

test_rew_main =  0.07464083190929514


 24%|██▍       | 719995/3000000 [12:27:43<10:52:47, 58.21it/s]  

ep_rew_main =  [0.06806983]


 24%|██▍       | 719995/3000000 [12:28:02<10:52:47, 58.21it/s]

test_rew_main =  0.003843963348338823
current_time =  2022-02-03T10:34:11.548362


 24%|██▍       | 720000/3000000 [12:30:09<4017:04:14,  6.34s/it]

current_time =  2022-02-03T10:35:36.229603


 24%|██▍       | 722993/3000000 [12:33:16<10:55:34, 57.89it/s]  

ep_rew_main =  [0.00136915]


 24%|██▍       | 723000/3000000 [12:34:21<1770:31:15,  2.80s/it]

test_rew_main =  0.017593571991926453


 24%|██▍       | 725993/3000000 [12:37:38<10:20:08, 61.11it/s]  

ep_rew_main =  [8.698455e-09]


 24%|██▍       | 726000/3000000 [12:38:38<1479:36:21,  2.34s/it]

test_rew_main =  0.028822815639439597


 24%|██▍       | 728994/3000000 [12:41:48<10:51:37, 58.09it/s]  

ep_rew_main =  [0.00384002]


 24%|██▍       | 729000/3000000 [12:42:49<1732:31:58,  2.75s/it]

test_rew_main =  0.12944209327335318


 24%|██▍       | 731997/3000000 [12:45:51<11:42:21, 53.82it/s]  

ep_rew_main =  [0.00025721]


 24%|██▍       | 732000/3000000 [12:46:50<1741:48:47,  2.76s/it]

test_rew_main =  0.028915067557867775


 24%|██▍       | 734998/3000000 [12:49:53<10:06:39, 62.23it/s]  

ep_rew_main =  [0.00401054]


 24%|██▍       | 735000/3000000 [12:50:51<1438:54:35,  2.29s/it]

test_rew_main =  0.11434286224491401


 25%|██▍       | 737992/3000000 [12:53:58<10:07:00, 62.11it/s]  

ep_rew_main =  [0.00373366]


 25%|██▍       | 738000/3000000 [12:54:57<1291:15:36,  2.06s/it]

test_rew_main =  0.05799100097567631


 25%|██▍       | 740994/3000000 [12:58:04<11:36:22, 54.07it/s]  

ep_rew_main =  [0.07579505]


 25%|██▍       | 741000/3000000 [12:59:02<1828:11:54,  2.91s/it]

test_rew_main =  0.17228156909457487


 25%|██▍       | 743993/3000000 [13:02:05<10:43:10, 58.46it/s]  

ep_rew_main =  [0.0065145]


 25%|██▍       | 744000/3000000 [13:03:05<1478:56:51,  2.36s/it]

test_rew_main =  0.15758832311442142


 25%|██▍       | 746998/3000000 [13:06:13<10:32:26, 59.37it/s]  

ep_rew_main =  [0.00055752]


 25%|██▍       | 747000/3000000 [13:07:14<1847:25:44,  2.95s/it]

test_rew_main =  0.06680630971956096


 25%|██▍       | 749995/3000000 [13:10:13<10:20:10, 60.47it/s]  

ep_rew_main =  [0.17406921]


 25%|██▍       | 749995/3000000 [13:10:24<10:20:10, 60.47it/s]

test_rew_main =  0.0015000189552952397
current_time =  2022-02-03T11:16:33.802049


 25%|██▌       | 750000/3000000 [13:12:22<3443:27:07,  5.51s/it]

current_time =  2022-02-03T11:17:49.284528


 25%|██▌       | 752995/3000000 [13:15:29<10:55:51, 57.10it/s]  

ep_rew_main =  [0.27318865]


 25%|██▌       | 753000/3000000 [13:16:37<1982:01:49,  3.18s/it]

test_rew_main =  0.06940712630137638


 25%|██▌       | 755999/3000000 [13:19:57<12:07:56, 51.38it/s]  

ep_rew_main =  [0.00108269]


 25%|██▌       | 756000/3000000 [13:21:00<2600:01:25,  4.17s/it]

test_rew_main =  0.07413916377932482


 25%|██▌       | 758991/3000000 [13:24:18<10:31:01, 59.19it/s]  

ep_rew_main =  [0.00016529]


 25%|██▌       | 759000/3000000 [13:25:22<1340:32:48,  2.15s/it]

test_rew_main =  0.018495806180102346


 25%|██▌       | 761993/3000000 [13:28:44<11:30:04, 54.05it/s]  

ep_rew_main =  [0.00175179]


 25%|██▌       | 762000/3000000 [13:29:48<1853:59:25,  2.98s/it]

test_rew_main =  0.026333162350782452


 25%|██▌       | 764996/3000000 [13:33:12<11:09:05, 55.67it/s]  

ep_rew_main =  [0.24787058]


 26%|██▌       | 765000/3000000 [13:34:21<2133:38:08,  3.44s/it]

test_rew_main =  0.058357866747095755


 26%|██▌       | 767997/3000000 [13:37:38<11:18:42, 54.81it/s]  

ep_rew_main =  [6.0382223e-07]


 26%|██▌       | 768000/3000000 [13:38:43<2077:30:13,  3.35s/it]

test_rew_main =  0.1628027056855287


 26%|██▌       | 770994/3000000 [13:42:02<10:36:53, 58.33it/s]  

ep_rew_main =  [0.14574175]


 26%|██▌       | 771000/3000000 [13:43:09<1859:20:54,  3.00s/it]

test_rew_main =  0.0019241366887380019


 26%|██▌       | 773999/3000000 [13:46:30<12:12:05, 50.68it/s]  

ep_rew_main =  [6.7621313e-06]


 26%|██▌       | 774000/3000000 [13:47:36<2733:48:24,  4.42s/it]

test_rew_main =  0.08602012152826831


 26%|██▌       | 776966/3000000 [13:50:57<39:37:32, 15.58it/s]  


KeyboardInterrupt: 

Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.


All 1 operations synced, thanks for waiting!


In [None]:
model = ac.q
print("Model_q's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

model = ac.pi
print("Model_pi's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

Model_q's state_dict:
q.0.weight 	 torch.Size([256, 41])
q.0.bias 	 torch.Size([256])
q.2.weight 	 torch.Size([256, 256])
q.2.bias 	 torch.Size([256])
q.4.weight 	 torch.Size([256, 256])
q.4.bias 	 torch.Size([256])
q.6.weight 	 torch.Size([1, 256])
q.6.bias 	 torch.Size([1])
Model_pi's state_dict:
pi.0.weight 	 torch.Size([256, 35])
pi.0.bias 	 torch.Size([256])
pi.2.weight 	 torch.Size([256, 256])
pi.2.bias 	 torch.Size([256])
pi.4.weight 	 torch.Size([256, 256])
pi.4.bias 	 torch.Size([256])
pi.6.weight 	 torch.Size([6, 256])
pi.6.bias 	 torch.Size([6])


In [None]:
print("pi_optimizer's state_dict:")
for var_name in pi_optimizer.state_dict():
    print(var_name, "\t", pi_optimizer.state_dict()[var_name])

print("q_optimizer's state_dict:")
for var_name in q_optimizer.state_dict():
    print(var_name, "\t", q_optimizer.state_dict()[var_name])



pi_optimizer's state_dict:
state 	 {0: {'step': 295000, 'square_avg': tensor([[1.5294e-07, 4.3574e-07, 1.7078e-07,  ..., 4.6295e-08, 1.3153e-07,
         3.2821e-08],
        [1.1260e-06, 1.0042e-06, 9.5667e-07,  ..., 3.2371e-07, 2.5242e-06,
         2.0768e-07],
        [3.3766e-07, 1.8660e-07, 2.4575e-07,  ..., 6.6838e-08, 3.6149e-07,
         3.6968e-08],
        ...,
        [3.4772e-07, 6.0496e-07, 3.3804e-07,  ..., 8.6878e-08, 3.6086e-07,
         6.7247e-08],
        [1.1408e-06, 7.1713e-07, 6.1176e-07,  ..., 3.4519e-07, 1.2215e-06,
         1.1788e-07],
        [2.8419e-07, 7.1120e-07, 3.0715e-07,  ..., 6.3061e-08, 4.3581e-07,
         7.5016e-08]], device='cuda:0')}, 1: {'step': 295000, 'square_avg': tensor([5.5312e-07, 3.3224e-06, 6.3727e-07, 5.1333e-07, 8.2843e-07, 6.8615e-07,
        8.8244e-07, 1.9534e-06, 6.2446e-07, 1.3387e-06, 4.0938e-07, 1.2642e-06,
        6.8603e-07, 9.9440e-07, 1.2369e-06, 7.1836e-07, 1.1587e-06, 1.1835e-06,
        9.5166e-07, 5.0816e-07, 1.7202e-0

In [None]:
now = datetime.now()

current_time = str(now.isoformat())



torch.save({
            'model of ac.q': ac.q.state_dict(),
            'model of ac.pi': ac.pi.state_dict(),
            'q_optimizer_state_dict': q_optimizer.state_dict(),
            'pi_optimizer_state_dict': pi_optimizer.state_dict(),
            
            }, "model_nn/model_nn_%s.pt" % current_time)



In [None]:
nep_log.stop()

Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 140 operations to synchronize with Neptune. Do not kill this process.


All 140 operations synced, thanks for waiting!
