In [1]:
import scipy.signal
import sys
import torch
import torch.nn as nn
import numpy as np

In [2]:
from typing import Dict, List, Optional, Tuple
import gym
from PIL import Image
# from pyvirtualdisplay import Display
# Display().start()
from datetime import datetime
from tqdm import tqdm

In [3]:
import math
import random
from copy import deepcopy
import torch
from torch.optim import Adam
from torch.optim import RMSprop
import gym
import time
from collections import namedtuple, deque
import neptune.new as neptune

In [4]:
import robosuite as suite
from robosuite.controllers import load_controller_config
from robosuite.controllers.controller_factory import reset_controllers
from robosuite.utils import observables
from robosuite.utils.input_utils import *
from robosuite.robots import Bimanual
import imageio
import numpy as np
import robosuite.utils.macros as macros
macros.IMAGE_CONVENTION = "opencv"

In [5]:
nep_log = neptune.init(
    project="xhnfirst/DDPG-robosuite",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI1NTg5MDI2OS01MTVmLTQ2YjUtODA1Yy02ZWQyNDgxZDcwN2UifQ==",
)

https://app.neptune.ai/xhnfirst/DDPG-robosuite/e/DDPGROB-227
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [6]:
options = {
    'env_name': 'EElab_test5',
    "robots": "UR5e"
}
controller_name = "JOINT_VELOCITY"
options["controller_configs"] = suite.load_controller_config(default_controller=controller_name)

env = suite.make(
    **options,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=False,
    gripper_types=None,
    renderer = 'mujoco',

)

test_env = suite.make(
    **options,
    has_renderer=False,
    has_offscreen_renderer=False,
    ignore_done=True,
    use_camera_obs=False,
    gripper_types=None,
    renderer = 'mujoco',
)


video_env = suite.make(
    **options,
    gripper_types=None,
    has_renderer=False,
    has_offscreen_renderer=True,
    ignore_done=True,
    use_camera_obs=True,
    use_object_obs=True, 
    camera_names='Labviewer',
    camera_heights=512,
    camera_widths=512,
    # control_freq=200,
    renderer = 'mujoco',
)

frame = []
device= torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('device = ', device)

device =  cuda


In [7]:
def mlp(sizes, activation, output_activation=nn.Identity):
    layers = []
    for j in range(len(sizes)-1):
        act = activation if j < len(sizes)-2 else output_activation
        layers += [nn.Linear(sizes[j], sizes[j+1]), act()]
    return nn.Sequential(*layers)


class MLPActor(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation, act_limit):
        super().__init__()
        pi_sizes = [obs_dim] + list(hidden_sizes) + [act_dim]
        self.pi = mlp(pi_sizes, activation, nn.Tanh)
        self.act_limit = act_limit

    def forward(self, obs):
        # Return output from network scaled to action space limits.
        return self.act_limit * self.pi(obs)

class MLPQFunction(nn.Module):

    def __init__(self, obs_dim, act_dim, hidden_sizes, activation):
        super().__init__()
        self.q = mlp([obs_dim + act_dim] + list(hidden_sizes) + [1], activation)

    def forward(self, obs, act):
        q = self.q(torch.cat([obs, act], dim=-1))
        return torch.squeeze(q, -1) # Critical to ensure q has right shape.

class MLPActorCritic(nn.Module):

    def __init__(self, hidden_sizes=(256,256),
                 activation=nn.ReLU, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
        super().__init__()

        obs_dim = 35
        act_dim = 6
        act_limit = 1

        # build policy and value functions
        self.pi = MLPActor(obs_dim, act_dim, hidden_sizes, activation, act_limit).to(device)
        self.q = MLPQFunction(obs_dim, act_dim, hidden_sizes, activation).to(device)

    def act(self, obs):
        with torch.no_grad():
            return self.pi(obs)

In [8]:
Transition = namedtuple('Transition',
                        ('obs', 'act', 'rew', 'next_obs', 'done'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([],maxlen=capacity)

    def push(self, *args):
        """Save a transition"""
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [9]:

params = {
    "dropout": 0.2,
    "learning_rate": 0.001,
    "optimizer": "Adam",
    "hid": 256,
    "l": 3,
    "seed": 0,
    "steps_per_epoch": 3000,
    "steps_video": 30000,
    "epochs": 1000,
    "replay_size": int(1e8),
    "gamma": 0.99,
    "polyak": 0.995,
    "pi_lr": 1e-4,
    "q_lr": 1e-4,
    "batch_size": 1000,
    "start_steps": 10000, 
    "update_after": 5000,
    "update_every": 100,
    "act_noise": 0.01,
    "num_test_episodes": 5,
    "max_ep_len": 300,
    "max_video_len": 300,
    "save_model_len": 10000,
    # "obs_dim": 47,
    # "act_dim": 7,
    # "act_limit": 1
}

ac_kwargs=dict(hidden_sizes=[params["hid"]]*params["l"])

In [10]:
nep_log["parameters"] = params

torch.manual_seed(params["seed"])
np.random.seed(params["seed"])

obs_dim = 35
print('obs_dim = ', obs_dim)
act_dim = 6
print('act_dim = ', act_dim)
# Action limit for clamping: critically, assumes all dimensions share the same bound!
act_limit = 1
print('act_limit = ', act_limit)
# Create actor-critic module and target networks
ac = MLPActorCritic(**ac_kwargs)
ac_targ = deepcopy(ac)

# Freeze target networks with respect to optimizers (only update via polyak averaging)
for p in ac_targ.parameters():
    p.requires_grad = False

memory = ReplayMemory(params["replay_size"])

obs_dim =  35
act_dim =  6
act_limit =  1


In [11]:
# Set up function for computing DDPG Q-loss
def compute_loss_q(data):

    o = torch.cat(data.obs).float()
    a = torch.cat(data.act).float()
    r = torch.cat(data.rew).float()
    o2 =torch.cat(data.next_obs).float()
    d = torch.cat(data.done).float()

    q = ac.q(o,a)


    # Bellman backup for Q function
    with torch.no_grad():
        q_pi_targ = ac_targ.q(o2, ac_targ.pi(o2))
        backup = r + params["gamma"] * (1 - d) * q_pi_targ

    # MSE loss against Bellman backup
    loss_q = ((q - backup)**2).mean()

    return loss_q

# Set up function for computing DDPG pi loss
def compute_loss_pi(data):

    o = torch.cat(data.obs).float()

    q_pi = ac.q(o, ac.pi(o))

    return -q_pi.mean()


In [12]:
pi_optimizer = RMSprop(ac.pi.parameters(), lr=params["pi_lr"])
q_optimizer = RMSprop(ac.q.parameters(), lr=params["q_lr"])

def update(data):
    # First run one gradient descent step for Q.


    q_optimizer.zero_grad()
    loss_q = compute_loss_q(data)

    loss_q.backward()

    q_optimizer.step()


    # Freeze Q-network so you don't waste computational effort 
    # computing gradients for it during the policy learning step.
    for p in ac.q.parameters():
        p.requires_grad = False

    # Next run one gradient descent step for pi.
    pi_optimizer.zero_grad()
    loss_pi = compute_loss_pi(data)
    loss_pi.backward()
    pi_optimizer.step()

    # Unfreeze Q-network so you can optimize it at next DDPG step.
    for p in ac.q.parameters():
        p.requires_grad = True



    # Finally, update target networks by polyak averaging.
    with torch.no_grad():
        for p, p_targ in zip(ac.parameters(), ac_targ.parameters()):
            # NB: We use an in-place operations "mul_", "add_" to update target
            # params, as opposed to "mul" and "add", which would make new tensors.
            p_targ.data.mul_(params["polyak"])
            p_targ.data.add_((1 - params["polyak"]) * p.data)


In [13]:



def get_action(o, noise_scale):
    a = ac.act(torch.as_tensor(o, dtype=torch.float32))
    a += noise_scale * torch.randn(act_dim).to(device)
    return torch.clip(a, -act_limit, act_limit)

def test_agent(epoch):
    test_main = 0
    test_step = 0
    for j in range(params["num_test_episodes"]):
        obs, d, test_ep_ret, test_ep_len = test_env.reset(), False, 0, 0
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], dtype=torch.float32, device=device)
        while not(d or (test_ep_len == params["max_ep_len"])):
            a_cpu = get_action(o, 0).cpu().data.numpy()
            obs, r, d, _ = test_env.step(a_cpu[0])
            o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
            o = torch.tensor([o], dtype=torch.float32, device=device)
            test_ep_ret += r
            test_ep_len += 1
        test_ep_main = test_ep_ret/test_ep_len
        test_step +=1
        test_main += test_ep_main
    print('test_rew_main = ', float(test_main/test_step))
    nep_log["test/reward"].log(test_main/test_step)
    
def video_agent(epoch):
    obs, d, test_ep_len = video_env.reset(), False, 0
    o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
    o = torch.tensor([o], dtype=torch.float32, device=device)
    now = datetime.now()
    current_time = str(now.isoformat())
    writer = imageio.get_writer(
        "/home/xhnfly/Cosmic_rays_X/X_Robot/robosuite/robosuite/demos/video/DDPG_re_touch_2_15_300/DDPG_UR5_%s_ep_%d.mp4" % (current_time, epoch), fps=100)
    frame = obs["Labviewer_image"]
    writer.append_data(frame)

    while not(d or (test_ep_len == params["max_video_len"])):
        a_cpu = get_action(o, 0).cpu().data.numpy()
        obs, _, d, _ = video_env.step(a_cpu[0])
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], dtype=torch.float32, device=device)
        frame = obs["Labviewer_image"]
        writer.append_data(frame)
        test_ep_len += 1
    writer.close()
    nep_log['video'] = neptune.types.File('/home/xhnfly/Cosmic_rays_X/X_Robot/robosuite/robosuite/demos/video/DDPG_re_touch_2_15_300/DDPG_UR5_%s_ep_%d.mp4' % (current_time, epoch))





In [14]:
# obs = {
#     'robot0_joint_pos_cos': None,
#     'robot0_joint_pos_sin': None,
#     'robot0_joint_vel': None,
#     'robot0_eef_pos': None,
#     'robot0_eef_quat': None,
#     'robot0_gripper_qpos': None,
#     'robot0_gripper_qvel': None,
#     'cubeA_pos': None,
#     'cubeA_quat': None,
#     'cubeB_pos': None,
#     'cubeB_quat': None,
#     'gripper_to_cubeA': None,
#     'gripper_to_cubeB': None,
#     'cubeA_to_cubeB': None,
# }

obs, ep_ret, ep_len = env.reset(), 0, 0

o = list(obs['robot0_proprio-state']) + list(obs['object-state'])

# env.viewer.set_camera(camera_id=0)


# Define neutral value
neutral = np.zeros(7)

# Keep track of done variable to know when to break loop

# Prepare for interaction with environment
total_steps = params["steps_per_epoch"] * params["epochs"]
start_time = time.time()

o = torch.tensor([o], device=device)


start_time_rec = datetime.now()
r_true = 0
total_main = 0
ep_rew_main = 0
reward_dict={}

In [15]:
# Main loop: collect experience in env and update/log each epoch
low, high = env.action_spec

for t in tqdm(range(total_steps)):
    
    # Until start_steps have elapsed, randomly sample actions
    # from a uniform distribution for better exploration. Afterwards, 
    # use the learned policy (with some noise, via act_noise). 
    if t > params["start_steps"]:
        a = get_action(o, params["act_noise"])      # Tensor
    else:
        a = torch.tensor([np.random.uniform(low, high)], dtype=torch.float32, device=device)
        
    a_cpu = a.cpu().data.numpy()
    # Step the env
    obs2, r, d, _ = env.step(a_cpu[0])
    
    o2 = list(obs2['robot0_proprio-state']) + list(obs2['object-state'])
    # print('len(o2) = ', len(o2))

    ep_len += 1
    total_main += r


    # Ignore the "done" signal if it comes from hitting the time
    # horizon (that is, when it's an artificial terminal signal
    # that isn't based on the agent's state)
    d = False if ep_len==params["max_ep_len"] else d

    o2 = torch.tensor([o2], dtype=torch.float32, device=device)
    r = torch.tensor([r], dtype=torch.float32, device=device)
    d = torch.tensor([d], dtype=torch.float32, device=device)

    # Store experience to replay buffer
    memory.push(o, a, r, o2, d)
    nep_log["train/o"].log(o)
    nep_log["train/a"].log(a)
    nep_log["train/r"].log(r)
    nep_log["train/o2"].log(o2)
    nep_log["train/d"].log(d)

    # Super critical, easy to overlook step: make sure to update 
    # most recent observation!
    o=o2
    ep_ret += r
    
    
    # End of trajectory handling
    if d or (ep_len == params["max_ep_len"]):
        ep_rew = ep_ret/ep_len
        ep_rew_main += ep_rew
        obs, ep_ret, ep_len = env.reset(), 0, 0
        o = list(obs['robot0_proprio-state']) + list(obs['object-state'])
        o = torch.tensor([o], device=device)


    # Update handling
    if t >= params["update_after"] and t % params["update_every"] == 0:
        for i in range(params["update_every"]):

            transitions = memory.sample(params["batch_size"])
            # Transpose the batch (see https://stackoverflow.com/a/19343/3343043 for
            # detailed explanation). This converts batch-array of Transitions
            # to Transition of batch-arrays.
            batch = Transition(*zip(*transitions))
            update(data=batch)

    # End of epoch handling
    if (t+1) % params["steps_per_epoch"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        train_reward = ep_rew_main/epoch
        nep_log["train/reward"].log(train_reward)
        print('ep_rew_main = ', train_reward.cpu().data.numpy())
        ep_rew_main = 0
        # Test the performance of the deterministic version of the agent.
        test_agent(epoch)
        

    if (t+1) % params["steps_video"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        now = datetime.now()
        current_time = str(now.isoformat())
        print('current_time = ', current_time)
        video_agent(epoch)
        now = datetime.now()
        current_time = str(now.isoformat())
        print('current_time = ', current_time)

    if (t+1) % params["save_model_len"] == 0:
        epoch = (t+1) // params["steps_per_epoch"]
        now = datetime.now()
        current_time = str(now.isoformat())
        torch.save({
                    'model of ac.q': ac.q.state_dict(),
                    'model of ac.pi': ac.pi.state_dict(),
                    'q_optimizer_state_dict': q_optimizer.state_dict(),
                    'pi_optimizer_state_dict': pi_optimizer.state_dict(),
                    
                    }, "model_nn/touch/model_nn_%s%d.pt" % (current_time, epoch))


        

  a = torch.tensor([np.random.uniform(low, high)], dtype=torch.float32, device=device)
  0%|          | 2980/3000000 [00:21<4:02:37, 205.87it/s]

ep_rew_main =  [1.3976852e-05]


  0%|          | 3021/3000000 [00:29<67:50:31, 12.27it/s]

test_rew_main =  7.0645495580884026e-06


  0%|          | 5996/3000000 [01:09<8:59:52, 92.43it/s] 

ep_rew_main =  [3.5185636e-05]
test_rew_main =  1.4167459784151237e-06


  0%|          | 8987/3000000 [02:17<10:09:46, 81.75it/s] 

ep_rew_main =  [-0.00337843]
test_rew_main =  -9.53628312867276


  0%|          | 11978/3000000 [03:25<10:37:52, 78.07it/s]

ep_rew_main =  [-1.3434132]


  0%|          | 12000/3000000 [03:32<80:47:24, 10.27it/s]

test_rew_main =  -0.386331659521489


  0%|          | 14987/3000000 [04:34<9:06:04, 91.11it/s] 

ep_rew_main =  [-1.1385311]
test_rew_main =  6.0438486891758166e-05


  1%|          | 17998/3000000 [05:42<8:31:53, 97.09it/s]  

ep_rew_main =  [-0.7500495]
test_rew_main =  2.0749157663966234e-05


  1%|          | 20994/3000000 [06:52<8:57:34, 92.36it/s]  

ep_rew_main =  [-0.08815525]
test_rew_main =  1.3582457716414085e-05


  1%|          | 23993/3000000 [08:00<8:54:45, 92.75it/s] 

ep_rew_main =  [-0.3352255]
test_rew_main =  -0.4166418394336765


  1%|          | 26989/3000000 [09:10<8:52:03, 93.13it/s]  

ep_rew_main =  [6.102222e-06]
test_rew_main =  -0.6271013851501517


  1%|          | 29990/3000000 [10:20<8:53:45, 92.74it/s]  

ep_rew_main =  [-0.03226674]
test_rew_main =  -0.12281559930030961
current_time =  2022-02-15T17:58:01.253060


  1%|          | 30000/3000000 [10:46<354:12:02,  2.33it/s]

current_time =  2022-02-15T17:58:20.009771


  1%|          | 32987/3000000 [11:49<9:23:01, 87.83it/s]  

ep_rew_main =  [0.00018851]
test_rew_main =  -0.03307451106111981


  1%|          | 35988/3000000 [12:59<9:07:45, 90.18it/s]  

ep_rew_main =  [0.00256262]
test_rew_main =  2.3538874632066015e-06


  1%|▏         | 38993/3000000 [14:09<8:41:44, 94.59it/s]  

ep_rew_main =  [-0.00833791]
test_rew_main =  -0.043380203268755316


  1%|▏         | 41991/3000000 [15:20<10:02:06, 81.88it/s]

ep_rew_main =  [-0.03096704]
test_rew_main =  -0.5174316641982875


  1%|▏         | 44994/3000000 [16:31<8:46:35, 93.53it/s] 

ep_rew_main =  [-0.01208877]
test_rew_main =  -0.41047569339023315


  2%|▏         | 47984/3000000 [17:45<11:14:15, 72.97it/s]

ep_rew_main =  [-0.00242314]
test_rew_main =  6.775927614788143e-06


  2%|▏         | 50981/3000000 [19:19<12:06:37, 67.64it/s] 

ep_rew_main =  [4.1050293e-06]


  2%|▏         | 51000/3000000 [19:26<94:54:22,  8.63it/s]

test_rew_main =  -0.5737885434309422


  2%|▏         | 53997/3000000 [20:32<9:32:39, 85.74it/s] 

ep_rew_main =  [1.4579624e-05]
test_rew_main =  1.2047641831551083e-06


  2%|▏         | 56996/3000000 [21:46<8:58:15, 91.13it/s]  

ep_rew_main =  [1.0343898e-05]
test_rew_main =  -0.2825054948516691


  2%|▏         | 59994/3000000 [23:00<9:07:17, 89.53it/s]  

ep_rew_main =  [5.1294746e-06]
test_rew_main =  -0.0011482394343309203
current_time =  2022-02-15T18:10:40.644691


  2%|▏         | 60000/3000000 [23:25<378:07:36,  2.16it/s]

current_time =  2022-02-15T18:10:59.594641


  2%|▏         | 62977/3000000 [24:31<10:47:51, 75.56it/s] 

ep_rew_main =  [4.1323974e-06]


  2%|▏         | 63000/3000000 [24:38<78:16:10, 10.42it/s]

test_rew_main =  -0.06464513502448893


  2%|▏         | 65997/3000000 [25:45<9:02:57, 90.06it/s] 

ep_rew_main =  [2.4105368e-06]
test_rew_main =  2.6616748371899315e-07


  2%|▏         | 68982/3000000 [26:57<10:42:52, 75.99it/s]

ep_rew_main =  [2.9427497e-06]
test_rew_main =  -0.13692997875472607


  2%|▏         | 71984/3000000 [28:11<10:26:46, 77.86it/s] 

ep_rew_main =  [2.3564526e-05]
test_rew_main =  3.638305483337767e-07


  2%|▏         | 74985/3000000 [29:25<10:36:43, 76.56it/s]

ep_rew_main =  [-0.06869252]
test_rew_main =  -0.058461755520490044


  3%|▎         | 77988/3000000 [30:39<10:31:46, 77.08it/s]

ep_rew_main =  [-0.00130178]
test_rew_main =  -1.353870194048291


  3%|▎         | 80996/3000000 [31:53<9:17:57, 87.19it/s] 

ep_rew_main =  [-0.05020698]
test_rew_main =  -0.11715350735711245


  3%|▎         | 83992/3000000 [33:07<9:36:08, 84.35it/s]  

ep_rew_main =  [-0.1104038]
test_rew_main =  -0.9916772288875679


  3%|▎         | 86997/3000000 [34:23<9:28:44, 85.36it/s]  

ep_rew_main =  [-0.00362781]
test_rew_main =  -0.34710288135722644


  3%|▎         | 89990/3000000 [35:38<9:28:10, 85.36it/s]  

ep_rew_main =  [2.1684737e-06]
test_rew_main =  -0.022031851659061928
current_time =  2022-02-15T18:23:19.311347


  3%|▎         | 90000/3000000 [36:04<345:07:19,  2.34it/s]

current_time =  2022-02-15T18:23:38.084182


  3%|▎         | 92986/3000000 [37:13<10:54:04, 74.07it/s] 

ep_rew_main =  [-0.01320389]
test_rew_main =  -0.08851358369890386


  3%|▎         | 95980/3000000 [38:28<11:15:36, 71.64it/s]

ep_rew_main =  [0.00041733]


  3%|▎         | 96000/3000000 [38:35<87:01:35,  9.27it/s]

test_rew_main =  -0.3318582531904727


  3%|▎         | 98986/3000000 [39:44<11:01:36, 73.08it/s]

ep_rew_main =  [-0.00671024]
test_rew_main =  3.394086099550719e-08


  3%|▎         | 101983/3000000 [41:00<11:18:39, 71.17it/s]

ep_rew_main =  [5.736211e-07]
test_rew_main =  -0.07992100350241212


  3%|▎         | 104999/3000000 [42:22<10:08:08, 79.34it/s] 

ep_rew_main =  [-0.00652291]
test_rew_main =  -0.0994478605463793


  4%|▎         | 107987/3000000 [43:46<10:14:53, 78.39it/s] 

ep_rew_main =  [-0.18406136]
test_rew_main =  -0.14872165169115864


  4%|▎         | 110994/3000000 [45:12<10:35:29, 75.77it/s] 

ep_rew_main =  [-0.01055019]
test_rew_main =  -0.43645758703152915


  4%|▍         | 113997/3000000 [46:38<9:47:07, 81.92it/s]  

ep_rew_main =  [0.00021885]
test_rew_main =  -0.5038942209574955


  4%|▍         | 116999/3000000 [48:06<9:28:42, 84.49it/s]  

ep_rew_main =  [3.2647188e-07]
test_rew_main =  3.925916848308934e-06


  4%|▍         | 119988/3000000 [49:34<11:14:50, 71.13it/s] 

ep_rew_main =  [-0.01047241]
test_rew_main =  -0.02261446868807108
current_time =  2022-02-15T18:37:16.132108


  4%|▍         | 120000/3000000 [50:02<424:56:58,  1.88it/s]

current_time =  2022-02-15T18:37:36.549493


  4%|▍         | 122982/3000000 [51:21<10:42:57, 74.58it/s] 

ep_rew_main =  [1.5551432e-08]


  4%|▍         | 123000/3000000 [51:29<104:35:24,  7.64it/s]

test_rew_main =  -0.4950570300327133


  4%|▍         | 125991/3000000 [52:49<10:23:34, 76.82it/s] 

ep_rew_main =  [-0.00428429]
test_rew_main =  -0.14378487293198516


  4%|▍         | 128999/3000000 [54:17<10:04:28, 79.16it/s] 

ep_rew_main =  [-0.04972145]
test_rew_main =  -0.016590977047051283


  4%|▍         | 131996/3000000 [55:45<10:09:03, 78.48it/s] 

ep_rew_main =  [1.3970793e-05]
test_rew_main =  3.655291110960412e-05


  4%|▍         | 134985/3000000 [57:14<10:30:35, 75.72it/s] 

ep_rew_main =  [9.880588e-07]
test_rew_main =  3.3204716825467883e-06


  5%|▍         | 137983/3000000 [58:37<11:39:47, 68.16it/s] 

ep_rew_main =  [-0.02480002]
test_rew_main =  -0.22072512167527786


  5%|▍         | 140999/3000000 [59:59<10:31:03, 75.51it/s]

ep_rew_main =  [-0.00567018]
test_rew_main =  -2.259753214818838


  5%|▍         | 143997/3000000 [1:01:48<10:46:36, 73.62it/s] 

ep_rew_main =  [-0.00171548]
test_rew_main =  -1.6886453941579522


  5%|▍         | 146994/3000000 [1:04:02<12:05:25, 65.55it/s] 

ep_rew_main =  [-0.04538861]


  5%|▍         | 147000/3000000 [1:04:14<291:53:00,  2.72it/s]

test_rew_main =  3.6997212843744945e-06


  5%|▍         | 149993/3000000 [1:06:23<12:32:44, 63.10it/s] 

ep_rew_main =  [-0.00830907]


  5%|▍         | 149993/3000000 [1:06:35<12:32:44, 63.10it/s]

test_rew_main =  -0.10334196174930281
current_time =  2022-02-15T18:54:09.888195


  5%|▌         | 150000/3000000 [1:07:07<1022:16:34,  1.29s/it]

current_time =  2022-02-15T18:54:41.342571


  5%|▌         | 152993/3000000 [1:09:20<11:28:58, 68.87it/s]  

ep_rew_main =  [-0.00288388]
test_rew_main =  2.2385897054203832e-07


  5%|▌         | 155998/3000000 [1:11:42<9:41:55, 81.45it/s]  

ep_rew_main =  [-0.00609079]
test_rew_main =  -0.08390710565734558


  5%|▌         | 158983/3000000 [1:13:30<12:36:16, 62.61it/s] 

ep_rew_main =  [1.5098244e-07]


  5%|▌         | 159000/3000000 [1:13:39<129:36:45,  6.09it/s]

test_rew_main =  3.647117559342957e-07


  5%|▌         | 161985/3000000 [1:15:13<12:16:27, 64.23it/s] 

ep_rew_main =  [-0.00423619]
test_rew_main =  5.790340417410888e-08


  5%|▌         | 164980/3000000 [1:16:57<12:33:44, 62.69it/s] 

ep_rew_main =  [5.096028e-07]


  6%|▌         | 165000/3000000 [1:17:06<121:16:25,  6.49it/s]

test_rew_main =  2.718557706665144e-06


  6%|▌         | 167986/3000000 [1:18:45<12:21:55, 63.62it/s] 

ep_rew_main =  [2.7204663e-07]
test_rew_main =  -0.656572010737788


  6%|▌         | 170998/3000000 [1:20:34<10:56:56, 71.77it/s] 

ep_rew_main =  [-0.00112943]
test_rew_main =  -0.0063988863691338636


  6%|▌         | 173998/3000000 [1:22:25<11:43:15, 66.97it/s] 

ep_rew_main =  [-0.01419989]
test_rew_main =  -0.43135215259927184


  6%|▌         | 176986/3000000 [1:24:17<12:30:16, 62.71it/s] 

ep_rew_main =  [-0.00789298]
test_rew_main =  1.035176665047451e-06


  6%|▌         | 179986/3000000 [1:26:14<13:53:50, 56.37it/s] 

ep_rew_main =  [-0.00014733]
test_rew_main =  -0.24198098423115844
current_time =  2022-02-15T19:13:58.716527


  6%|▌         | 180000/3000000 [1:26:50<526:48:26,  1.49it/s]

current_time =  2022-02-15T19:14:24.413387


  6%|▌         | 182995/3000000 [1:28:36<11:32:41, 67.78it/s] 

ep_rew_main =  [-0.00010284]
test_rew_main =  5.670539031093365e-05


  6%|▌         | 185981/3000000 [1:30:33<13:55:35, 56.13it/s] 

ep_rew_main =  [-0.02162925]


  6%|▌         | 186000/3000000 [1:30:42<132:15:23,  5.91it/s]

test_rew_main =  -1.128325312577077


  6%|▋         | 188997/3000000 [1:32:29<10:50:55, 71.97it/s] 

ep_rew_main =  [-0.00048014]
test_rew_main =  2.2106238223392377e-07


  6%|▋         | 191997/3000000 [1:34:31<12:16:20, 63.56it/s] 

ep_rew_main =  [-0.02353915]
test_rew_main =  -0.0661430415185982


  6%|▋         | 194997/3000000 [1:36:31<10:56:11, 71.24it/s] 

ep_rew_main =  [-0.00016761]
test_rew_main =  -0.07095651782930726


  7%|▋         | 197982/3000000 [1:38:04<13:16:32, 58.63it/s] 

ep_rew_main =  [9.775336e-08]
test_rew_main =  0.00022276888362119364


  7%|▋         | 200986/3000000 [1:39:36<12:47:56, 60.75it/s] 

ep_rew_main =  [2.8752822e-07]
test_rew_main =  -0.07217931097153252


  7%|▋         | 203981/3000000 [1:41:10<13:25:29, 57.85it/s] 

ep_rew_main =  [-0.18652263]


  7%|▋         | 204000/3000000 [1:41:16<86:39:05,  8.96it/s]

test_rew_main =  6.833764658598085e-09


  7%|▋         | 206998/3000000 [1:42:43<11:20:41, 68.39it/s]

ep_rew_main =  [-0.00081346]
test_rew_main =  1.334324362067818e-07


  7%|▋         | 209995/3000000 [1:44:16<11:12:59, 69.10it/s] 

ep_rew_main =  [-0.00082612]
test_rew_main =  1.0943314484072412e-08
current_time =  2022-02-15T19:31:57.512391


  7%|▋         | 210000/3000000 [1:44:43<403:03:42,  1.92it/s]

current_time =  2022-02-15T19:32:16.989327


  7%|▋         | 212998/3000000 [1:46:12<11:22:20, 68.07it/s] 

ep_rew_main =  [-0.00305875]
test_rew_main =  -0.01662435393882803


  7%|▋         | 215978/3000000 [1:47:45<12:43:00, 60.81it/s] 

ep_rew_main =  [-0.0018528]


  7%|▋         | 216000/3000000 [1:47:52<78:16:20,  9.88it/s]

test_rew_main =  -0.5557959853619916


  7%|▋         | 218983/3000000 [1:49:17<12:55:46, 59.75it/s]

ep_rew_main =  [-1.898857e-05]
test_rew_main =  -0.43188330562550414


  7%|▋         | 221997/3000000 [1:50:49<11:13:53, 68.71it/s] 

ep_rew_main =  [0.0001203]
test_rew_main =  -1.109719040446777


  7%|▋         | 224986/3000000 [1:52:21<12:41:28, 60.74it/s] 

ep_rew_main =  [-0.03233175]
test_rew_main =  -0.32123312590976755


  8%|▊         | 227997/3000000 [1:53:56<11:08:06, 69.15it/s] 

ep_rew_main =  [-0.01532909]
test_rew_main =  -0.20108151001076685


  8%|▊         | 230981/3000000 [1:55:29<12:58:06, 59.31it/s] 

ep_rew_main =  [1.199531e-06]
test_rew_main =  -0.6684618209852353


  8%|▊         | 233999/3000000 [1:57:03<11:09:25, 68.86it/s] 

ep_rew_main =  [0.00026631]
test_rew_main =  -4.3722436762873915


  8%|▊         | 236992/3000000 [1:58:38<10:40:16, 71.92it/s] 

ep_rew_main =  [-0.01170316]
test_rew_main =  -1.3987755437101803


  8%|▊         | 239993/3000000 [2:00:15<10:57:18, 69.98it/s] 

ep_rew_main =  [-0.02961257]
test_rew_main =  -0.013118292897273348
current_time =  2022-02-15T19:47:56.252585


  8%|▊         | 240000/3000000 [2:00:41<347:26:12,  2.21it/s]

current_time =  2022-02-15T19:48:14.988777


  8%|▊         | 242982/3000000 [2:02:18<13:30:55, 56.66it/s] 

ep_rew_main =  [-0.00010303]


  8%|▊         | 243000/3000000 [2:02:26<118:40:47,  6.45it/s]

test_rew_main =  -0.37443882158027475


  8%|▊         | 245989/3000000 [2:04:01<12:22:25, 61.82it/s] 

ep_rew_main =  [0.00010132]
test_rew_main =  -1.7027464541290098


  8%|▊         | 248991/3000000 [2:05:47<12:33:46, 60.83it/s] 

ep_rew_main =  [-0.00312571]
test_rew_main =  -1.2055234933312664


  8%|▊         | 251995/3000000 [2:07:31<11:33:12, 66.07it/s] 

ep_rew_main =  [-0.00808832]
test_rew_main =  1.2804020790354217e-06


  8%|▊         | 254990/3000000 [2:09:12<11:29:22, 66.37it/s] 

ep_rew_main =  [-0.03011222]
test_rew_main =  -0.29715077765539694


  9%|▊         | 257998/3000000 [2:10:54<11:24:33, 66.76it/s] 

ep_rew_main =  [-0.00077594]
test_rew_main =  -1.5495698383133292


  9%|▊         | 260985/3000000 [2:12:37<13:53:09, 54.79it/s] 

ep_rew_main =  [-0.02339916]
test_rew_main =  -0.4117551794119053


  9%|▉         | 263995/3000000 [2:14:17<10:58:42, 69.23it/s] 

ep_rew_main =  [-0.00370678]
test_rew_main =  -0.38379166453391156


  9%|▉         | 266993/3000000 [2:16:02<11:12:54, 67.69it/s] 

ep_rew_main =  [-0.0033289]
test_rew_main =  -0.1679059605274254


  9%|▉         | 269983/3000000 [2:17:44<13:39:33, 55.52it/s] 

ep_rew_main =  [-0.0040376]
test_rew_main =  -0.3170376589639511
current_time =  2022-02-15T20:05:25.522020


  9%|▉         | 270000/3000000 [2:18:11<300:31:54,  2.52it/s]

current_time =  2022-02-15T20:05:44.929044


  9%|▉         | 272993/3000000 [2:19:47<11:41:49, 64.76it/s] 

ep_rew_main =  [1.6299893e-07]
test_rew_main =  -0.6004587568381641


  9%|▉         | 275993/3000000 [2:21:30<11:39:02, 64.95it/s] 

ep_rew_main =  [-0.03731632]
test_rew_main =  -2.231291312697617


  9%|▉         | 278981/3000000 [2:23:14<13:56:57, 54.19it/s] 

ep_rew_main =  [-0.0094991]
test_rew_main =  -0.896620270359576


  9%|▉         | 281985/3000000 [2:24:58<14:31:55, 51.95it/s] 

ep_rew_main =  [-0.00040196]
test_rew_main =  -1.2578239178221282


  9%|▉         | 284981/3000000 [2:26:43<14:48:45, 50.91it/s] 

ep_rew_main =  [-0.00024093]


 10%|▉         | 285000/3000000 [2:26:50<85:29:21,  8.82it/s]

test_rew_main =  -3.72978540454614


 10%|▉         | 287994/3000000 [2:28:28<11:54:22, 63.27it/s]

ep_rew_main =  [2.9703651e-05]
test_rew_main =  -1.0667572625543662


 10%|▉         | 290994/3000000 [2:30:14<11:43:00, 64.22it/s] 

ep_rew_main =  [-0.00288793]
test_rew_main =  -0.07605269740017485


 10%|▉         | 293989/3000000 [2:32:04<12:14:45, 61.38it/s] 

ep_rew_main =  [-0.04117192]
test_rew_main =  -0.052184055969386675


 10%|▉         | 296998/3000000 [2:33:51<11:49:01, 63.54it/s] 

ep_rew_main =  [-0.00048693]
test_rew_main =  -0.5113986145775973


 10%|▉         | 299995/3000000 [2:35:38<12:03:15, 62.22it/s] 

ep_rew_main =  [-0.04348731]
test_rew_main =  -0.26718644603549013
current_time =  2022-02-15T20:23:19.579712


 10%|█         | 300000/3000000 [2:36:04<357:55:51,  2.10it/s]

current_time =  2022-02-15T20:23:38.302097


 10%|█         | 302996/3000000 [2:37:45<11:45:13, 63.74it/s] 

ep_rew_main =  [-0.00866403]
test_rew_main =  -0.28731445577839415


 10%|█         | 305996/3000000 [2:39:34<12:04:48, 61.95it/s] 

ep_rew_main =  [-0.01178707]
test_rew_main =  -0.5334636471842049


 10%|█         | 308996/3000000 [2:41:22<11:45:59, 63.53it/s] 

ep_rew_main =  [-0.01140404]
test_rew_main =  -0.318167166636607


 10%|█         | 311995/3000000 [2:43:13<12:01:40, 62.08it/s] 

ep_rew_main =  [-0.0108533]
test_rew_main =  -0.0017572732006228965


 10%|█         | 314984/3000000 [2:45:06<15:15:03, 48.90it/s] 

ep_rew_main =  [1.6334869e-05]
test_rew_main =  -0.013543715244808425


 11%|█         | 317992/3000000 [2:46:59<12:08:22, 61.37it/s] 

ep_rew_main =  [-0.01440912]
test_rew_main =  -0.24409608176303016


 11%|█         | 320979/3000000 [2:48:53<15:48:37, 47.07it/s] 

ep_rew_main =  [-0.05747803]


 11%|█         | 321000/3000000 [2:49:01<85:51:34,  8.67it/s]

test_rew_main =  -0.2968420574777736


 11%|█         | 323988/3000000 [2:50:45<12:27:28, 59.67it/s] 

ep_rew_main =  [-0.09427062]
test_rew_main =  -0.16205855864812074


 11%|█         | 326990/3000000 [2:52:38<12:22:17, 60.02it/s] 

ep_rew_main =  [-0.01051797]
test_rew_main =  -0.2067565652879247


 11%|█         | 329997/3000000 [2:54:33<12:00:45, 61.74it/s] 

ep_rew_main =  [-0.03904284]
test_rew_main =  -0.4470440076371561
current_time =  2022-02-15T20:42:14.241174


 11%|█         | 330000/3000000 [2:54:59<367:50:11,  2.02it/s]

current_time =  2022-02-15T20:42:33.061979


 11%|█         | 332990/3000000 [2:56:47<12:50:21, 57.70it/s] 

ep_rew_main =  [-0.00876159]
test_rew_main =  -0.1723414966680414


 11%|█         | 335984/3000000 [2:58:43<15:33:22, 47.57it/s] 

ep_rew_main =  [-0.00082213]
test_rew_main =  -1.3269475222304532


 11%|█▏        | 338985/3000000 [3:00:38<15:56:31, 46.37it/s] 

ep_rew_main =  [-0.0126372]
test_rew_main =  -1.68634785855666


 11%|█▏        | 341990/3000000 [3:02:35<12:18:48, 59.96it/s] 

ep_rew_main =  [-0.00140861]
test_rew_main =  -2.4018498541168904


 11%|█▏        | 344990/3000000 [3:04:33<12:37:52, 58.39it/s] 

ep_rew_main =  [-0.02736486]
test_rew_main =  -3.003894708257911


 12%|█▏        | 347983/3000000 [3:06:31<15:24:54, 47.79it/s] 

ep_rew_main =  [2.8079841e-05]


 12%|█▏        | 348000/3000000 [3:06:39<116:54:12,  6.30it/s]

test_rew_main =  -5.589109402045663


 12%|█▏        | 350988/3000000 [3:08:32<16:10:58, 45.47it/s] 

ep_rew_main =  [6.321012e-05]
test_rew_main =  -0.24978164687411444


 12%|█▏        | 353982/3000000 [3:10:30<15:53:55, 46.23it/s] 

ep_rew_main =  [-0.00338115]
test_rew_main =  -1.8622438952876308


 12%|█▏        | 356987/3000000 [3:12:28<13:53:51, 52.83it/s] 

ep_rew_main =  [-1.9181918e-05]
test_rew_main =  -0.32379777808029647


 12%|█▏        | 359983/3000000 [3:14:27<16:20:55, 44.86it/s] 

ep_rew_main =  [-0.00126788]
test_rew_main =  -0.16872669880833435
current_time =  2022-02-15T21:02:08.692401


 12%|█▏        | 360000/3000000 [3:14:53<297:33:16,  2.46it/s]

current_time =  2022-02-15T21:02:27.595694


 12%|█▏        | 362998/3000000 [3:16:46<12:02:26, 60.84it/s] 

ep_rew_main =  [-2.509456e-05]
test_rew_main =  -0.002994031727168353


 12%|█▏        | 365998/3000000 [3:18:46<12:31:07, 58.45it/s] 

ep_rew_main =  [-0.00381386]
test_rew_main =  -0.010474321602242567


 12%|█▏        | 368988/3000000 [3:20:46<15:52:04, 46.06it/s] 

ep_rew_main =  [-0.00293395]
test_rew_main =  -0.3801488322107792


 12%|█▏        | 371997/3000000 [3:22:48<12:56:01, 56.44it/s] 

ep_rew_main =  [-0.00422828]
test_rew_main =  -0.14119062832499402


 12%|█▏        | 374999/3000000 [3:24:50<12:53:43, 56.54it/s] 

ep_rew_main =  [0.00018093]
test_rew_main =  -2.2904871445750787


 13%|█▎        | 377995/3000000 [3:26:51<13:17:07, 54.82it/s] 

ep_rew_main =  [0.00043428]
test_rew_main =  -0.5531947736424778


 13%|█▎        | 380998/3000000 [3:28:53<12:29:04, 58.27it/s] 

ep_rew_main =  [-0.09464352]
test_rew_main =  -1.5906177523038836


 13%|█▎        | 383981/3000000 [3:30:55<17:11:07, 42.28it/s] 

ep_rew_main =  [-0.01845482]


 13%|█▎        | 384000/3000000 [3:31:02<85:36:55,  8.49it/s]

test_rew_main =  -0.36874071741028364


 13%|█▎        | 386999/3000000 [3:36:46<12:56:39, 56.07it/s]  

ep_rew_main =  [-0.00306072]
test_rew_main =  -0.039271323585396783


 13%|█▎        | 389997/3000000 [3:38:49<12:34:46, 57.63it/s] 

ep_rew_main =  [-0.00597202]
test_rew_main =  -0.22467485796074665
current_time =  2022-02-15T21:26:29.956266


 13%|█▎        | 390000/3000000 [3:39:14<333:38:38,  2.17it/s]

current_time =  2022-02-15T21:26:48.806848


 13%|█▎        | 392990/3000000 [3:41:12<15:48:36, 45.80it/s] 

ep_rew_main =  [-0.00169103]
test_rew_main =  -0.5062022389603464


 13%|█▎        | 395978/3000000 [3:43:16<16:05:55, 44.93it/s] 

ep_rew_main =  [-0.02033794]


 13%|█▎        | 396000/3000000 [3:43:23<74:35:16,  9.70it/s]

test_rew_main =  -1.0370785265394549


 13%|█▎        | 398985/3000000 [3:45:21<15:18:15, 47.21it/s]

ep_rew_main =  [-0.00836324]
test_rew_main =  -1.7090534971943772


 13%|█▎        | 401991/3000000 [3:47:26<15:54:25, 45.37it/s] 

ep_rew_main =  [0.0002237]
test_rew_main =  -0.5501146526311336


 13%|█▎        | 404999/3000000 [3:49:33<13:22:15, 53.91it/s] 

ep_rew_main =  [3.871575e-05]
test_rew_main =  -1.2246293653822737


 14%|█▎        | 407995/3000000 [3:51:40<13:43:58, 52.43it/s] 

ep_rew_main =  [-0.00821566]
test_rew_main =  -1.1600453487775364


 14%|█▎        | 410991/3000000 [3:53:47<16:15:15, 44.24it/s] 

ep_rew_main =  [-0.01321308]
test_rew_main =  -0.7898365762535529


 14%|█▍        | 413996/3000000 [3:55:56<13:05:48, 54.85it/s] 

ep_rew_main =  [-0.01079163]
test_rew_main =  1.5861754179190187e-06


 14%|█▍        | 416999/3000000 [3:58:05<12:49:20, 55.96it/s] 

ep_rew_main =  [-0.00064366]
test_rew_main =  1.7506915578639298e-07


 14%|█▍        | 419994/3000000 [4:00:15<13:41:00, 52.37it/s] 

ep_rew_main =  [7.7591676e-07]
test_rew_main =  4.4172065542221385e-07
current_time =  2022-02-15T21:47:56.285324


 14%|█▍        | 420000/3000000 [4:00:41<330:14:03,  2.17it/s]

current_time =  2022-02-15T21:48:15.161100


 14%|█▍        | 422987/3000000 [4:02:44<16:30:42, 43.35it/s] 

ep_rew_main =  [4.4248966e-07]
test_rew_main =  -0.09218654441305954


 14%|█▍        | 425976/3000000 [4:04:54<17:22:32, 41.15it/s] 

ep_rew_main =  [3.1010268e-06]


 14%|█▍        | 426000/3000000 [4:05:02<81:11:39,  8.81it/s]

test_rew_main =  -0.1664366843819018


 14%|█▍        | 428997/3000000 [4:07:06<14:03:38, 50.79it/s]

ep_rew_main =  [-8.006866e-05]
test_rew_main =  -0.1681465879117498


 14%|█▍        | 431993/3000000 [4:09:18<13:20:36, 53.46it/s] 

ep_rew_main =  [6.01587e-07]
test_rew_main =  -0.28471769051395296


 14%|█▍        | 434981/3000000 [4:11:30<16:58:38, 41.97it/s] 

ep_rew_main =  [-0.00289711]
test_rew_main =  3.492307318247641e-06


 15%|█▍        | 437982/3000000 [4:13:44<17:22:12, 40.97it/s] 

ep_rew_main =  [-0.00225787]
test_rew_main =  1.773369985040259e-06


 15%|█▍        | 440993/3000000 [4:15:57<13:40:51, 51.96it/s] 

ep_rew_main =  [1.1591198e-06]
test_rew_main =  5.1040885915116283e-05


 15%|█▍        | 443994/3000000 [4:18:12<13:49:15, 51.37it/s] 

ep_rew_main =  [-0.0004233]
test_rew_main =  -0.4276505038803887


 15%|█▍        | 446977/3000000 [4:20:27<17:21:37, 40.85it/s] 

ep_rew_main =  [-0.00213976]


 15%|█▍        | 447000/3000000 [4:20:34<79:32:08,  8.92it/s]

test_rew_main =  1.5490571902521047e-05


 15%|█▍        | 449998/3000000 [4:22:49<13:13:53, 53.53it/s]

ep_rew_main =  [5.1102164e-05]
test_rew_main =  3.3665100988764e-05
current_time =  2022-02-15T22:10:30.146437


 15%|█▌        | 450000/3000000 [4:23:14<326:59:49,  2.17it/s]

current_time =  2022-02-15T22:10:48.873646


 15%|█▌        | 452977/3000000 [4:25:24<17:44:28, 39.88it/s] 

ep_rew_main =  [2.8569834e-06]


 15%|█▌        | 453000/3000000 [4:25:33<89:17:38,  7.92it/s]

test_rew_main =  -0.045314317675322915


 15%|█▌        | 455999/3000000 [4:27:43<13:19:19, 53.04it/s] 

ep_rew_main =  [1.4076165e-06]
test_rew_main =  -0.11165487219997539


 15%|█▌        | 458984/3000000 [4:30:01<17:08:46, 41.17it/s] 

ep_rew_main =  [5.2778156e-05]
test_rew_main =  -0.06571954456283575


 15%|█▌        | 461993/3000000 [4:32:20<17:13:01, 40.95it/s] 

ep_rew_main =  [-0.01208147]
test_rew_main =  -0.07703006872803678


 15%|█▌        | 464998/3000000 [4:34:38<14:37:04, 48.17it/s] 

ep_rew_main =  [-0.00054194]
test_rew_main =  -0.6120123428349757


 16%|█▌        | 467978/3000000 [4:36:57<17:03:39, 41.23it/s] 

ep_rew_main =  [1.5381346e-07]


 16%|█▌        | 468000/3000000 [4:37:04<77:53:13,  9.03it/s]

test_rew_main =  -0.18425716934866082


 16%|█▌        | 470988/3000000 [4:39:17<18:06:05, 38.81it/s]

ep_rew_main =  [-0.00382228]
test_rew_main =  -0.8692997163928243


 16%|█▌        | 473987/3000000 [4:41:38<17:50:37, 39.32it/s] 

ep_rew_main =  [-0.01510622]
test_rew_main =  -0.2051573661696732


 16%|█▌        | 476988/3000000 [4:44:00<17:53:24, 39.17it/s] 

ep_rew_main =  [8.6543247e-07]
test_rew_main =  6.070129152108657e-06


 16%|█▌        | 479993/3000000 [4:46:23<17:51:51, 39.18it/s] 

ep_rew_main =  [0.0002873]
test_rew_main =  0.0008590110570719178
current_time =  2022-02-15T22:34:04.614846


 16%|█▌        | 480000/3000000 [4:46:49<300:04:30,  2.33it/s]

current_time =  2022-02-15T22:34:23.463257


 16%|█▌        | 482982/3000000 [4:49:05<17:54:39, 39.04it/s] 

ep_rew_main =  [0.00044099]
test_rew_main =  -0.04198852505125414


 16%|█▌        | 485975/3000000 [4:51:29<18:51:04, 37.04it/s] 

ep_rew_main =  [1.5745258e-05]


 16%|█▌        | 486000/3000000 [4:51:36<75:03:04,  9.30it/s]

test_rew_main =  1.4120055979264617e-08


 16%|█▋        | 488988/3000000 [4:53:52<18:22:47, 37.95it/s]

ep_rew_main =  [-0.03130187]
test_rew_main =  9.90777883143797e-08


 16%|█▋        | 491999/3000000 [4:56:17<13:41:43, 50.87it/s] 

ep_rew_main =  [-0.00937475]
test_rew_main =  -0.20041995031312165


 16%|█▋        | 494993/3000000 [4:58:42<15:05:38, 46.10it/s]

ep_rew_main =  [-0.00961774]
test_rew_main =  -0.158720145334475


 17%|█▋        | 497991/3000000 [5:01:07<17:14:39, 40.30it/s] 

ep_rew_main =  [-0.01596928]
test_rew_main =  -0.1521787960568994


 17%|█▋        | 500998/3000000 [5:03:33<14:52:13, 46.68it/s] 

ep_rew_main =  [-0.00198229]
test_rew_main =  -0.46209189150274554


 17%|█▋        | 503993/3000000 [5:06:04<14:41:29, 47.19it/s] 

ep_rew_main =  [-0.00132921]
test_rew_main =  -0.3553765397645233


 17%|█▋        | 506989/3000000 [5:08:32<18:29:30, 37.45it/s] 

ep_rew_main =  [5.904077e-07]
test_rew_main =  2.448358248503683e-06


 17%|█▋        | 509981/3000000 [5:11:00<17:55:50, 38.57it/s] 

ep_rew_main =  [-0.00108621]
test_rew_main =  -0.033454629210251025
current_time =  2022-02-15T22:58:41.974641


 17%|█▋        | 510000/3000000 [5:11:26<248:44:12,  2.78it/s]

current_time =  2022-02-15T22:59:00.828725


 17%|█▋        | 512990/3000000 [5:13:51<18:36:20, 37.13it/s] 

ep_rew_main =  [-0.00113393]
test_rew_main =  -0.021974361185996213


 17%|█▋        | 515993/3000000 [5:16:21<17:44:43, 38.88it/s] 

ep_rew_main =  [-0.0055322]
test_rew_main =  6.233445594658881e-07


 17%|█▋        | 518995/3000000 [5:18:51<18:14:52, 37.77it/s] 

ep_rew_main =  [0.00024052]
test_rew_main =  2.084677659477537e-05


 17%|█▋        | 521991/3000000 [5:21:22<18:35:07, 37.04it/s] 

ep_rew_main =  [1.34318925e-05]
test_rew_main =  -0.0006569440765138685


 17%|█▋        | 524975/3000000 [5:23:55<19:24:10, 35.43it/s] 

ep_rew_main =  [1.7368645e-08]


 18%|█▊        | 525000/3000000 [5:24:02<67:25:48, 10.20it/s]

test_rew_main =  -0.006875075696051218


 18%|█▊        | 527996/3000000 [5:26:28<15:30:42, 44.27it/s]

ep_rew_main =  [-0.00301019]
test_rew_main =  -0.0018896640538755272


 18%|█▊        | 530984/3000000 [5:29:02<18:16:55, 37.51it/s] 

ep_rew_main =  [2.6406426e-06]
test_rew_main =  5.886873617366728e-07


 18%|█▊        | 533985/3000000 [5:31:36<19:49:57, 34.54it/s] 

ep_rew_main =  [1.02674676e-07]
test_rew_main =  6.899328485550084e-07


 18%|█▊        | 536977/3000000 [5:34:11<19:31:46, 35.03it/s] 

ep_rew_main =  [4.7381914e-08]


 18%|█▊        | 537000/3000000 [5:34:19<79:55:23,  8.56it/s]

test_rew_main =  6.488478532921487e-08


 18%|█▊        | 539990/3000000 [5:36:47<18:50:38, 36.26it/s] 

ep_rew_main =  [1.0761229e-07]
test_rew_main =  9.836066537241006e-07
current_time =  2022-02-15T23:24:28.344302


 18%|█▊        | 540000/3000000 [5:37:13<276:40:43,  2.47it/s]

current_time =  2022-02-15T23:24:47.052135


 18%|█▊        | 542984/3000000 [5:39:43<19:49:23, 34.43it/s] 

ep_rew_main =  [-9.325161e-05]
test_rew_main =  -0.1886330648595588


 18%|█▊        | 545992/3000000 [5:42:18<18:25:12, 37.01it/s] 

ep_rew_main =  [7.20999e-08]
test_rew_main =  -0.6790491917015458


 18%|█▊        | 548998/3000000 [5:44:55<18:09:48, 37.48it/s] 

ep_rew_main =  [-0.00254091]
test_rew_main =  -0.6497891355748753


 18%|█▊        | 551992/3000000 [5:47:32<16:21:15, 41.58it/s] 

ep_rew_main =  [-0.00022717]
test_rew_main =  -0.06456555653096764


 18%|█▊        | 554995/3000000 [5:50:10<15:10:19, 44.76it/s] 

ep_rew_main =  [-0.0020278]
test_rew_main =  -0.4277479157931888


 19%|█▊        | 557987/3000000 [5:52:49<19:30:24, 34.77it/s] 

ep_rew_main =  [6.033632e-05]
test_rew_main =  -0.06584210572577484


 19%|█▊        | 560998/3000000 [5:55:28<15:38:46, 43.30it/s] 

ep_rew_main =  [-0.00172703]
test_rew_main =  -0.05138878193978301


 19%|█▉        | 563998/3000000 [5:58:13<15:12:42, 44.48it/s] 

ep_rew_main =  [-0.00403569]
test_rew_main =  -0.5181836615050047


 19%|█▉        | 566991/3000000 [6:01:04<16:10:10, 41.80it/s] 

ep_rew_main =  [-6.443609e-06]
test_rew_main =  -0.1171768959966036


 19%|█▉        | 569993/3000000 [6:03:50<16:12:23, 41.65it/s] 

ep_rew_main =  [-0.0005694]
test_rew_main =  -0.37328191988194687
current_time =  2022-02-15T23:51:31.195340


 19%|█▉        | 570000/3000000 [6:04:16<315:14:36,  2.14it/s]

current_time =  2022-02-15T23:51:50.578425


 19%|█▉        | 572985/3000000 [6:06:53<20:03:57, 33.60it/s] 

ep_rew_main =  [-0.00674162]
test_rew_main =  -0.05805563895909134


 19%|█▉        | 575988/3000000 [6:09:36<19:56:32, 33.76it/s] 

ep_rew_main =  [-0.01333329]
test_rew_main =  -1.0263156535547624


 19%|█▉        | 578989/3000000 [6:12:19<19:57:16, 33.70it/s] 

ep_rew_main =  [1.1884703e-07]
test_rew_main =  -0.03902131687990492


 19%|█▉        | 581999/3000000 [6:15:02<14:59:23, 44.81it/s] 

ep_rew_main =  [-0.00163587]
test_rew_main =  -0.8591578977178547


 19%|█▉        | 584995/3000000 [6:17:51<15:50:43, 42.34it/s] 

ep_rew_main =  [2.0765962e-07]
test_rew_main =  -0.08840015559567685


 20%|█▉        | 587995/3000000 [6:20:40<16:20:42, 40.99it/s] 

ep_rew_main =  [-0.004683]
test_rew_main =  -0.2392518904577306


 20%|█▉        | 590979/3000000 [6:23:31<20:18:09, 32.96it/s] 

ep_rew_main =  [2.3045303e-07]


 20%|█▉        | 591000/3000000 [6:23:39<84:46:58,  7.89it/s]

test_rew_main =  -0.016869573920367628


 20%|█▉        | 593981/3000000 [6:26:22<21:20:19, 31.32it/s] 

ep_rew_main =  [-0.00417175]
test_rew_main =  -0.4583578453314578


 20%|█▉        | 596999/3000000 [6:29:13<15:56:37, 41.87it/s] 

ep_rew_main =  [4.7397975e-06]
test_rew_main =  -0.0026028733617309463


 20%|█▉        | 599987/3000000 [6:32:04<20:31:43, 32.48it/s] 

ep_rew_main =  [1.0589659e-08]
test_rew_main =  -0.06130090306481657
current_time =  2022-02-16T00:19:45.824382


 20%|██        | 600000/3000000 [6:32:29<269:40:47,  2.47it/s]

current_time =  2022-02-16T00:20:03.593965


 20%|██        | 602983/3000000 [6:35:14<20:55:00, 31.83it/s] 

ep_rew_main =  [-0.00028619]
test_rew_main =  -0.005335795218452222


 20%|██        | 605992/3000000 [6:38:08<19:49:25, 33.55it/s] 

ep_rew_main =  [-0.00231671]
test_rew_main =  -0.47011874034703327


 20%|██        | 608981/3000000 [6:41:02<21:57:02, 30.26it/s] 

ep_rew_main =  [-0.00077618]


 20%|██        | 609000/3000000 [6:41:10<93:19:39,  7.12it/s]

test_rew_main =  -0.6088103866520816


 20%|██        | 611992/3000000 [6:43:55<16:43:06, 39.68it/s] 

ep_rew_main =  [-0.02284953]
test_rew_main =  -1.5181355215263375


 20%|██        | 614995/3000000 [6:46:50<16:44:33, 39.57it/s] 

ep_rew_main =  [-0.00039768]
test_rew_main =  -2.5751834919593866


 21%|██        | 617984/3000000 [6:49:46<20:51:50, 31.71it/s] 

ep_rew_main =  [-0.00253545]
test_rew_main =  -1.4935191812534332


 21%|██        | 620989/3000000 [6:52:39<19:37:30, 33.67it/s] 

ep_rew_main =  [-0.00206527]
test_rew_main =  -3.664069787592325


 21%|██        | 623985/3000000 [6:55:35<21:47:06, 30.30it/s] 

ep_rew_main =  [-0.00103856]
test_rew_main =  -0.025096748512288835


 21%|██        | 626999/3000000 [6:58:32<16:56:03, 38.92it/s] 

ep_rew_main =  [-0.00035202]
test_rew_main =  -1.2442815585337685


 21%|██        | 629994/3000000 [7:01:32<16:46:35, 39.24it/s] 

ep_rew_main =  [-0.00732953]
test_rew_main =  -0.11769842012797267
current_time =  2022-02-16T00:49:13.136808


 21%|██        | 630000/3000000 [7:01:59<316:23:49,  2.08it/s]

current_time =  2022-02-16T00:49:33.545930


 21%|██        | 632996/3000000 [7:04:52<16:32:13, 39.76it/s] 

ep_rew_main =  [-0.00540091]
test_rew_main =  1.5041998947279054e-06


 21%|██        | 635986/3000000 [7:07:52<21:12:21, 30.97it/s] 

ep_rew_main =  [-0.00119224]
test_rew_main =  -0.09238762042346817


 21%|██▏       | 638989/3000000 [7:10:52<20:45:02, 31.61it/s] 

ep_rew_main =  [5.9148482e-05]
test_rew_main =  6.906645124906597e-05


 21%|██▏       | 641985/3000000 [7:13:52<21:44:39, 30.12it/s] 

ep_rew_main =  [-0.00290001]
test_rew_main =  -0.32272548045845456


 21%|██▏       | 644995/3000000 [7:16:54<18:12:35, 35.92it/s] 

ep_rew_main =  [-0.0015175]
test_rew_main =  -0.010119109961721025


 22%|██▏       | 647998/3000000 [7:19:55<16:20:54, 39.96it/s] 

ep_rew_main =  [0.00049598]
test_rew_main =  -1.0035530659476568


 22%|██▏       | 650999/3000000 [7:22:58<16:20:48, 39.92it/s] 

ep_rew_main =  [0.00010859]
test_rew_main =  -0.09004238717170251


 22%|██▏       | 653994/3000000 [7:26:01<20:34:00, 31.69it/s] 

ep_rew_main =  [0.00018893]
test_rew_main =  -0.7910429017911313


 22%|██▏       | 656997/3000000 [7:29:05<17:38:59, 36.87it/s] 

ep_rew_main =  [5.1981046e-05]
test_rew_main =  -0.3303288000583512


 22%|██▏       | 659980/3000000 [7:32:10<22:18:23, 29.14it/s] 

ep_rew_main =  [6.509603e-07]
test_rew_main =  -0.7802361044103605
current_time =  2022-02-16T01:19:51.052519


 22%|██▏       | 660000/3000000 [7:32:36<243:39:52,  2.67it/s]

current_time =  2022-02-16T01:20:10.431137


 22%|██▏       | 662998/3000000 [7:35:34<17:21:22, 37.40it/s] 

ep_rew_main =  [6.287421e-07]
test_rew_main =  -1.0256685018777147


 22%|██▏       | 665990/3000000 [7:38:40<21:52:33, 29.64it/s] 

ep_rew_main =  [2.8832414e-08]
test_rew_main =  1.6345958527086423e-07


 22%|██▏       | 668984/3000000 [7:41:47<21:57:52, 29.48it/s] 

ep_rew_main =  [8.160594e-08]
test_rew_main =  2.886436633131595e-08


 22%|██▏       | 671986/3000000 [7:44:54<21:32:55, 30.01it/s] 

ep_rew_main =  [1.6700756e-06]
test_rew_main =  2.1600790914274207e-07


 22%|██▏       | 674980/3000000 [7:48:01<22:13:25, 29.06it/s] 

ep_rew_main =  [8.793909e-07]
test_rew_main =  1.1289196235016636e-08


 23%|██▎       | 677985/3000000 [7:51:09<23:02:58, 27.98it/s] 

ep_rew_main =  [4.6305132e-07]
test_rew_main =  -0.0020985806275697367


 23%|██▎       | 680996/3000000 [7:54:16<16:40:11, 38.64it/s] 

ep_rew_main =  [5.7081793e-09]
test_rew_main =  4.997154470492611e-06


 23%|██▎       | 683980/3000000 [7:57:26<22:08:23, 29.06it/s] 

ep_rew_main =  [-0.00082072]
test_rew_main =  2.3174848541381484e-08


 23%|██▎       | 686992/3000000 [8:00:36<17:25:46, 36.86it/s] 

ep_rew_main =  [0.00012983]
test_rew_main =  -0.01920481995895999


 23%|██▎       | 689991/3000000 [8:03:55<17:53:01, 35.88it/s] 

ep_rew_main =  [-0.00910262]
test_rew_main =  -0.05861846452545453
current_time =  2022-02-16T01:51:36.607126


 23%|██▎       | 690000/3000000 [8:04:26<410:43:59,  1.56it/s]

current_time =  2022-02-16T01:52:00.091995


 23%|██▎       | 692988/3000000 [8:07:45<19:20:47, 33.12it/s] 

ep_rew_main =  [-0.00122335]
test_rew_main =  0.00027624494367906624


 23%|██▎       | 693700/3000000 [8:08:46<27:04:59, 23.65it/s] 


KeyboardInterrupt: 

Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.


All 1 operations synced, thanks for waiting!


In [None]:
model = ac.q
print("Model_q's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

model = ac.pi
print("Model_pi's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

In [None]:
print("pi_optimizer's state_dict:")
for var_name in pi_optimizer.state_dict():
    print(var_name, "\t", pi_optimizer.state_dict()[var_name])

print("q_optimizer's state_dict:")
for var_name in q_optimizer.state_dict():
    print(var_name, "\t", q_optimizer.state_dict()[var_name])

In [None]:
now = datetime.now()

current_time = str(now.isoformat())



torch.save({
            'model of ac.q': ac.q.state_dict(),
            'model of ac.pi': ac.pi.state_dict(),
            'q_optimizer_state_dict': q_optimizer.state_dict(),
            'pi_optimizer_state_dict': pi_optimizer.state_dict(),
            
            }, "model_nn/model_nn_%s.pt" % current_time)



In [None]:
nep_log.stop()