In [1]:
#!cat ~/.bashrc

In [2]:
#!mv ../drone_dataset.pkl .

In [3]:
#!pip3 install --upgrade protobuf==3.20.0 

In [4]:
#!pip3 install transformers==4.5.1
#!pip3 install -U tokenizers
# The code below just solve many problems lol
#!pip3 uninstall tokenizers -y

In [5]:
from torch.utils.tensorboard import SummaryWriter
import argparse
import pickle
import random
import time
import gym
import d4rl
import torch
import numpy as np

import utils
from replay_buffer import ReplayBuffer
from lamb import Lamb
from stable_baselines3.common.vec_env import SubprocVecEnv
from pathlib import Path
from data import create_dataloader
from decision_transformer.models.decision_transformer import DecisionTransformer
from evaluation import create_vec_eval_episodes_fn, vec_evaluate_episode_rtg
from trainer import SequenceTrainer
from logger import Logger

from env import make_pytorch_env

#MAX_EPISODE_LEN = 2000 # 4000 # 2000 # 4000 # Warning: there is a similar variable in data.py! 

pybullet build time: May 20 2022 19:44:17


In [6]:
import sys
sys.argv = ['']

parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=10)
parser.add_argument("--env", type=str, default="drone_dataset")

# model options
context_length = 80 #1200 # 1500 # 550 #330
## Training Context Length K: (default: 20)
parser.add_argument("--K", type=int, default=context_length) 
## Embedding dimension: (default: 512)
parser.add_argument("--embed_dim", type=int, default=512)
## Number of Layers: (default: 4)
parser.add_argument("--n_layer", type=int, default=6)
## Number of Attention Heads: (default: 4)
parser.add_argument("--n_head", type=int, default=8)
## Nonlinearity function: 
parser.add_argument("--activation_function", type=str, default="relu")
## Dropout:
parser.add_argument("--dropout", type=float, default=0.1)
## Evaluating Context Length K: 
parser.add_argument("--eval_context_length", type=int, default=context_length)
## Positional embedding: absolute ordering
parser.add_argument("--ordering", type=int, default=1) # 0

# shared evaluation options
# g_eval: (default: 3600)
parser.add_argument("--eval_rtg", type=int, default=3600)
parser.add_argument("--num_eval_episodes", type=int, default=10)

# shared training options
parser.add_argument("--init_temperature", type=float, default=0.1)
## Batch Size: (default: 256)
parser.add_argument("--batch_size", type=int, default=256)
parser.add_argument("--learning_rate", "-lr", type=float, default=3e-4)
parser.add_argument("--weight_decay", "-wd", type=float, default=5e-4)
## Warmup Steps: (default: 10000)
parser.add_argument("--warmup_steps", type=int, default=10000)

# pretraining options
parser.add_argument("--max_pretrain_iters", type=int, default=1)
parser.add_argument("--num_updates_per_pretrain_iter", type=int, default=5000)

# finetuning options
parser.add_argument("--max_online_iters", type=int, default=2000)
parser.add_argument("--online_rtg", type=int, default=7200)
## Number of Online Rollouts: (default: 1)
parser.add_argument("--num_online_rollouts", type=int, default=5)
parser.add_argument("--replay_size", type=int, default=1000)
parser.add_argument("--num_updates_per_online_iter", type=int, default=300)
parser.add_argument("--eval_interval", type=int, default=10)

# environment options
parser.add_argument("--device", type=str, default="cuda")
parser.add_argument("--log_to_tb", "-w", type=bool, default=True)
parser.add_argument("--save_dir", type=str, default="./exp")
parser.add_argument("--exp_name", type=str, default="default")

# general options
parser.add_argument("--max_episode_len", type=int, default=2000)

# Add for fast Debbuging
'''
parser.add_argument("--K", type=int, default=1)
parser.add_argument("--embed_dim", type=int, default=4)
parser.add_argument("--n_layer", type=int, default=2)
parser.add_argument("--n_head", type=int, default=2)
parser.add_argument("--eval_context_length", type=int, default=1)
parser.add_argument("--eval_rtg", type=int, default=600)
parser.add_argument("--num_eval_episodes", type=int, default=2)
parser.add_argument("--batch_size", type=int, default=4)
parser.add_argument("--warmup_steps", type=int, default=10)
parser.add_argument("--num_updates_per_pretrain_iter", type=int, default=500)
parser.add_argument("--max_online_iters", type=int, default=500)
parser.add_argument("--online_rtg", type=int, default=720)
'''



args = parser.parse_args()

In [7]:
class Experiment:
    def __init__(self, variant):

        self.state_dim, self.act_dim, self.action_range = self._get_env_spec(variant)
        self.offline_trajs, self.state_mean, self.state_std = self._load_dataset(variant["env"])
        
        # initialize by offline trajs
        self.replay_buffer = ReplayBuffer(variant["replay_size"], self.offline_trajs)

        self.aug_trajs = []

        self.device = variant.get("device", "cuda")
        self.target_entropy = -self.act_dim
        self.model = DecisionTransformer(
            state_dim=self.state_dim,
            act_dim=self.act_dim,
            action_range=self.action_range,
            max_length=variant["K"],
            eval_context_length=variant["eval_context_length"],
            max_ep_len=variant["max_episode_len"],
            hidden_size=variant["embed_dim"],
            n_layer=variant["n_layer"],
            n_head=variant["n_head"],
            n_inner=4 * variant["embed_dim"],
            activation_function=variant["activation_function"],
            n_positions=1024,
            n_ctx=3*variant["K"],
            resid_pdrop=variant["dropout"],
            attn_pdrop=variant["dropout"],
            stochastic_policy=True,
            ordering=variant["ordering"],
            init_temperature=variant["init_temperature"],
            target_entropy=self.target_entropy,
        ).to(device=self.device)

        self.optimizer = Lamb(
            self.model.parameters(),
            lr=variant["learning_rate"],
            weight_decay=variant["weight_decay"],
            eps=1e-8,
        )
        self.scheduler = torch.optim.lr_scheduler.LambdaLR(
            self.optimizer, lambda steps: min((steps + 1) / variant["warmup_steps"], 1)
        )

        self.log_temperature_optimizer = torch.optim.Adam(
            [self.model.log_temperature],
            lr=1e-4,
            betas=[0.9, 0.999],
        )

        # track the training progress and
        # training/evaluation/online performance in all the iterations
        self.pretrain_iter = 0
        self.online_iter = 0
        self.total_transitions_sampled = 0
        self.variant = variant
        self.reward_scale = 1.0 #  if "antmaze" in variant["env"] else 0.02 # almost the same as dividing by 42
        self.logger = Logger(variant)

    def _get_env_spec(self, variant):
        #####env = gym.make(variant["env"])
        env = make_pytorch_env(args)
        #env.max_step = MAX_EPISODE_LEN
        state_dim = env.observation_space.shape[0]
        act_dim = env.action_space.shape[0]
        #action_range = [-0.999999, 0.999999]
        
        action_range = [
            float(env.action_space.low.min()) + 1e-6,
            float(env.action_space.high.max()) - 1e-6,
        ]
        
        print("action_range: {}".format(action_range))
        env.close()
        return state_dim, act_dim, action_range

    def _save_model(self, path_prefix, is_pretrain_model=False):
        to_save = {
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "scheduler_state_dict": self.scheduler.state_dict(),
            "pretrain_iter": self.pretrain_iter,
            "online_iter": self.online_iter,
            "args": self.variant,
            "total_transitions_sampled": self.total_transitions_sampled,
            "np": np.random.get_state(),
            "python": random.getstate(),
            "pytorch": torch.get_rng_state(),
            "log_temperature_optimizer_state_dict": self.log_temperature_optimizer.state_dict(),
        }

        with open(f"{path_prefix}/model.pt", "wb") as f:
            torch.save(to_save, f)
        print(f"\nModel saved at {path_prefix}/model.pt")

        if is_pretrain_model:
            with open(f"{path_prefix}/pretrain_model.pt", "wb") as f:
                torch.save(to_save, f)
            print(f"Model saved at {path_prefix}/pretrain_model.pt")

    def _load_model(self, path_prefix):
        if Path(f"{path_prefix}/model.pt").exists():
            with open(f"{path_prefix}/model.pt", "rb") as f:
                checkpoint = torch.load(f)
            self.model.load_state_dict(checkpoint["model_state_dict"])
            self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
            self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
            self.log_temperature_optimizer.load_state_dict(
                checkpoint["log_temperature_optimizer_state_dict"]
            )
            self.pretrain_iter = checkpoint["pretrain_iter"]
            self.online_iter = checkpoint["online_iter"]
            self.total_transitions_sampled = checkpoint["total_transitions_sampled"]
            np.random.set_state(checkpoint["np"])
            random.setstate(checkpoint["python"])
            torch.set_rng_state(checkpoint["pytorch"])
            print(f"Model loaded at {path_prefix}/model.pt")

    def _load_dataset(self, env_name):

        dataset_path = f"./data/{env_name}.pkl"
        with open(dataset_path, "rb") as f:
            trajectories = pickle.load(f)

        states, traj_lens, returns = [], [], []
        for path in trajectories:
            states.append(path["observations"])
            traj_lens.append(len(path["observations"]))
            returns.append(path["rewards"].sum())
        traj_lens, returns = np.array(traj_lens), np.array(returns)

        # used for input normalization
        states = np.concatenate(states, axis=0)
        state_mean, state_std = np.mean(states, axis=0), np.std(states, axis=0) + 1e-6
        num_timesteps = sum(traj_lens)

        print("=" * 50)
        print(f"Starting new experiment: {env_name}")
        print(f"{len(traj_lens)} trajectories, {num_timesteps} timesteps found")
        print(f"Average return: {np.mean(returns):.2f}, std: {np.std(returns):.2f}")
        print(f"Max return: {np.max(returns):.2f}, min: {np.min(returns):.2f}")
        print(f"Average length: {np.mean(traj_lens):.2f}, std: {np.std(traj_lens):.2f}")
        print(f"Max length: {np.max(traj_lens):.2f}, min: {np.min(traj_lens):.2f}")
        print("=" * 50)

        sorted_inds = np.argsort(returns)  # lowest to highest
        num_trajectories = 1
        timesteps = traj_lens[sorted_inds[-1]]
        ind = len(trajectories) - 2
        while ind >= 0 and timesteps + traj_lens[sorted_inds[ind]] < num_timesteps:
            timesteps += traj_lens[sorted_inds[ind]]
            num_trajectories += 1
            ind -= 1
        sorted_inds = sorted_inds[-num_trajectories:]
        trajectories = [trajectories[ii] for ii in sorted_inds]

        return trajectories, state_mean, state_std

    def _augment_trajectories(
        self,
        online_envs,
        target_explore,
        n,
        randomized=False,
    ):

        max_ep_len = self.variant["max_episode_len"]

        with torch.no_grad():
            # generate init state
            target_return = [target_explore * self.reward_scale] * online_envs.num_envs

            returns, lengths, trajs = vec_evaluate_episode_rtg(
                online_envs,
                self.state_dim,
                self.act_dim,
                self.model,
                max_ep_len=max_ep_len,
                reward_scale=self.reward_scale,
                target_return=target_return,
                mode="normal",
                state_mean=self.state_mean,
                state_std=self.state_std,
                device=self.device,
                use_mean=False,
            )

        self.replay_buffer.add_new_trajs(trajs)
        self.aug_trajs += trajs
        self.total_transitions_sampled += np.sum(lengths)

        return {
            "aug_traj/return": np.mean(returns),
            "aug_traj/length": np.mean(lengths),
        }

    def pretrain(self, eval_envs, loss_fn):
        print("\n\n\n*** Pretrain ***")
        print("----------------")
        print("eval_envs: {}".format(eval_envs))
        print("loss_fn: {}".format(loss_fn))
        
        eval_fns = [
            create_vec_eval_episodes_fn(
                vec_env=eval_envs,
                eval_rtg=self.variant["eval_rtg"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                device=self.device,
                use_mean=True,
                reward_scale=self.reward_scale,
                max_episode_len = self.variant["max_episode_len"],
            )
        ]

        trainer = SequenceTrainer(
            model=self.model,
            optimizer=self.optimizer,
            log_temperature_optimizer=self.log_temperature_optimizer,
            scheduler=self.scheduler,
            device=self.device,
        )

        writer = (
            SummaryWriter(self.logger.log_path) if self.variant["log_to_tb"] else None
        )
        while self.pretrain_iter < self.variant["max_pretrain_iters"]:
            # in every iteration, prepare the data loader
            dataloader = create_dataloader(
                trajectories=self.offline_trajs,
                num_iters=self.variant["num_updates_per_pretrain_iter"],
                batch_size=self.variant["batch_size"],
                max_len=self.variant["K"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                reward_scale=self.reward_scale,
                action_range=self.action_range,
                max_episode_len = self.variant["max_episode_len"],
            )

            train_outputs = trainer.train_iteration(
                loss_fn=loss_fn,
                dataloader=dataloader,
            )
            eval_outputs, eval_reward = self.evaluate(eval_fns)
            outputs = {"time/total": time.time() - self.start_time}
            outputs.update(train_outputs)
            outputs.update(eval_outputs)
            self.logger.log_metrics(
                outputs,
                iter_num=self.pretrain_iter,
                total_transitions_sampled=self.total_transitions_sampled,
                writer=writer,
            )

            self._save_model(
                path_prefix=self.logger.log_path,
                is_pretrain_model=True,
            )

            self.pretrain_iter += 1

    def evaluate(self, eval_fns):
        eval_start = time.time()
        self.model.eval()
        outputs = {}
        for eval_fn in eval_fns:
            o = eval_fn(self.model)
            outputs.update(o)
        outputs["time/evaluation"] = time.time() - eval_start

        eval_reward = outputs["evaluation/return_mean_gm"]
        return outputs, eval_reward

    def online_tuning(self, online_envs, eval_envs, loss_fn):

        print("\n\n\n*** Online Finetuning ***")

        trainer = SequenceTrainer(
            model=self.model,
            optimizer=self.optimizer,
            log_temperature_optimizer=self.log_temperature_optimizer,
            scheduler=self.scheduler,
            device=self.device,
        )
        eval_fns = [
            create_vec_eval_episodes_fn(
                vec_env=eval_envs,
                eval_rtg=self.variant["eval_rtg"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                device=self.device,
                use_mean=True,
                reward_scale=self.reward_scale,
                max_episode_len = self.variant["max_episode_len"],
            )
        ]
        writer = (
            SummaryWriter(self.logger.log_path) if self.variant["log_to_tb"] else None
        )
        while self.online_iter < self.variant["max_online_iters"]:

            outputs = {}
            augment_outputs = self._augment_trajectories(
                online_envs,
                self.variant["online_rtg"],
                n=self.variant["num_online_rollouts"],
            )
            outputs.update(augment_outputs)

            dataloader = create_dataloader(
                trajectories=self.replay_buffer.trajectories,
                num_iters=self.variant["num_updates_per_online_iter"],
                batch_size=self.variant["batch_size"],
                max_len=self.variant["K"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                reward_scale=self.reward_scale,
                action_range=self.action_range,
                max_episode_len = self.variant["max_episode_len"],
            )

            # finetuning
            is_last_iter = self.online_iter == self.variant["max_online_iters"] - 1
            if (self.online_iter + 1) % self.variant[
                "eval_interval"
            ] == 0 or is_last_iter:
                evaluation = True
            else:
                evaluation = False

            train_outputs = trainer.train_iteration(
                loss_fn=loss_fn,
                dataloader=dataloader,
            )
            outputs.update(train_outputs)

            if evaluation:
                eval_outputs, eval_reward = self.evaluate(eval_fns)
                outputs.update(eval_outputs)

            outputs["time/total"] = time.time() - self.start_time

            # log the metrics
            self.logger.log_metrics(
                outputs,
                iter_num=self.pretrain_iter + self.online_iter,
                total_transitions_sampled=self.total_transitions_sampled,
                writer=writer,
            )

            self._save_model(
                path_prefix=self.logger.log_path,
                is_pretrain_model=False,
            )

            self.online_iter += 1

    def __call__(self):

        utils.set_seed_everywhere(args.seed)

        import d4rl

        def loss_fn(
            a_hat_dist,     # action_preds
            a,              # action_target
            attention_mask, # padding_mask
            entropy_reg,    # self.model.temperature().detach()
        ):
            # a_hat is a SquashedNormal Distribution
            log_likelihood = a_hat_dist.log_likelihood(a)[attention_mask > 0].mean()
            
            entropy = a_hat_dist.entropy().mean()
            loss = -(log_likelihood + entropy_reg * entropy)
            
            '''
            print("a_hat_dist : {}".format(a_hat_dist))
            print("a : {}".format(a))
            torch.save(a,"a.pt")
            print("a_hat_dist.log_likelihood(a) : {}".format(a_hat_dist.log_likelihood(a)))
            #print("attention_mask : {}".format(attention_mask))
            print("log_likelihood: {}".format(log_likelihood))
            print("loss inside jupyter: {} of type: {}".format(loss,type(loss)))
            '''
            
            return (
                loss,
                -log_likelihood,
                entropy,
            )

        def get_env_builder(seed, env_name, target_goal=None):
            def make_env_fn():
                import d4rl

                #####env = gym.make(env_name)
                env = make_pytorch_env(args)
                #env.max_step = MAX_EPISODE_LEN
                env.seed(seed)
                '''
                if hasattr(env.env, "wrapped_env"):
                    env.env.wrapped_env.seed(seed)
                elif hasattr(env.env, "seed"):
                    env.env.seed(seed)
                else:
                    pass
                '''
                '''
                env.action_space.seed(seed)
                env.observation_space.seed(seed)
                '''

                if target_goal:
                    env.set_target_goal(target_goal)
                    print(f"Set the target goal to be {env.target_goal}")
                return env

            return make_env_fn

        print("\n\nMaking Eval Env.....")
        env_name = self.variant["env"]
        if "antmaze" in env_name:
            env = gym.make(env_name)
            target_goal = env.target_goal
            env.close()
            print(f"Generated the fixed target goal: {target_goal}")
        else:
            target_goal = None
        eval_envs = SubprocVecEnv(
            [
                get_env_builder(i, env_name=env_name, target_goal=target_goal)
                for i in range(self.variant["num_eval_episodes"])
            ]
        )

        self.start_time = time.time()
        
        if self.variant["max_pretrain_iters"]:
            self.pretrain(eval_envs, loss_fn)
        
        if self.variant["max_online_iters"]:
            print("\n\nMaking Online Env.....")
            online_envs = SubprocVecEnv(
                [
                    get_env_builder(i + 100, env_name=env_name, target_goal=target_goal)
                    for i in range(self.variant["num_online_rollouts"])
                ]
            )
            self.online_tuning(online_envs, eval_envs, loss_fn)
            online_envs.close()

        eval_envs.close()

In [None]:
utils.set_seed_everywhere(args.seed)
experiment = Experiment(vars(args))

print("=" * 50)
experiment()

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


action_range: [-0.999999, 0.999999]
Starting new experiment: drone_dataset
1254 trajectories, 1971662 timesteps found
Average return: 36.15, std: 8.58
Max return: 51.14, min: 12.39
Average length: 1572.30, std: 325.37
Max length: 2000.00, min: 920.00
{'n_layer': 6, 'n_head': 8, 'n_inner': 2048, 'activation_function': 'relu', 'n_positions': 1024, 'n_ctx': 240, 'resid_pdrop': 0.1, 'attn_pdrop': 0.1}
Experiment log path: ./exp/2023.04.20/200320-default


Making Eval Env.....


pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17





*** Pretrain ***
----------------
eval_envs: <stable_baselines3.common.vec_env.subproc_vec_env.SubprocVecEnv object at 0x7fe2dd8aee80>
loss_fn: <function Experiment.__call__.<locals>.loss_fn at 0x7fe2d6db53a0>


pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17


Iteration 0
time/total: 3470.549707174301
time/training: 3451.707262277603
training/train_loss_mean: 39.42650579595465
training/train_loss_std: 258.26410359183666
training/nll: -6.90283203125
training/entropy: -5.044419765472412
training/temp_value: 0.15841142537743239
evaluation/return_mean_gm: -193.71528418814722
evaluation/return_std_gm: 4.090017000257461
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.768984079360962

Model saved at ./exp/2023.04.20/200320-default/model.pt
Model saved at ./exp/2023.04.20/200320-default/pretrain_model.pt


Making Online Env.....


pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17





*** Online Finetuning ***
Iteration 1
aug_traj/return: -111.15154629630335
aug_traj/length: 2000.0
time/training: 208.64312148094177
training/train_loss_mean: -6.3462051067512775
training/train_loss_std: 1.0105364698989878
training/nll: -7.87704610824585
training/entropy: -5.199371814727783
training/temp_value: 0.16433928695637032
time/total: 3696.3863785266876

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 2
aug_traj/return: -214.54928026151705
aug_traj/length: 2000.0
time/training: 208.31338596343994
training/train_loss_mean: -6.783432297770521
training/train_loss_std: 0.4746278577846673
training/nll: -7.396350860595703
training/entropy: -5.507862091064453
training/temp_value: 0.17079253252845866
time/total: 3916.5691022872925

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 3
aug_traj/return: -134.5810520880347
aug_traj/length: 2000.0
time/training: 208.13588523864746
training/train_loss_mean: -6.880070981290195
training/train_loss_std: 0.538


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 18
aug_traj/return: -118.01569762008782
aug_traj/length: 2000.0
time/training: 208.31742429733276
training/train_loss_mean: -9.601267276394832
training/train_loss_std: 0.264525932199132
training/nll: -11.846134185791016
training/entropy: -7.773285865783691
training/temp_value: 0.3080628165322683
time/total: 7460.385885953903

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 19
aug_traj/return: -86.31831553013475
aug_traj/length: 2000.0
time/training: 208.34132385253906
training/train_loss_mean: -9.583750876904126
training/train_loss_std: 0.24822934873665287
training/nll: -12.638765335083008
training/entropy: -8.361311912536621
training/temp_value: 0.31937983434897155
time/total: 7680.400046110153

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 20
aug_traj/return: -62.50661658510845
aug_traj/length: 2000.0
time/training: 208.52430033683777
training/train_loss_mean: -9.617453518694177
t


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 35
aug_traj/return: -51.464674228841204
aug_traj/length: 2000.0
time/training: 208.39018654823303
training/train_loss_mean: -7.979401332503548
training/train_loss_std: 0.2179013892185591
training/nll: -11.766280174255371
training/entropy: -7.836762428283691
training/temp_value: 0.5454017105445358
time/total: 11241.819692373276

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 36
aug_traj/return: -108.31390344645756
aug_traj/length: 2000.0
time/training: 208.2200026512146
training/train_loss_mean: -7.833145126542664
training/train_loss_std: 0.3276229619157932
training/nll: -12.081859588623047
training/entropy: -7.698834419250488
training/temp_value: 0.5634651110727448
time/total: 11461.724347352982

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 37
aug_traj/return: -37.745754656052554
aug_traj/length: 2000.0
time/training: 208.24295210838318
training/train_loss_mean: -7.643012709360430


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 52
aug_traj/return: -150.85558383517372
aug_traj/length: 2000.0
time/training: 208.29891777038574
training/train_loss_mean: -4.824272695781549
training/train_loss_std: 0.19751149645532767
training/nll: -10.385469436645508
training/entropy: -6.105861663818359
training/temp_value: 0.9337468349026433
time/total: 15021.0561709404

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 53
aug_traj/return: -176.5941176470649
aug_traj/length: 2000.0
time/training: 208.51244711875916
training/train_loss_mean: -4.526706569945193
training/train_loss_std: 1.4747475095354563
training/nll: -10.452630043029785
training/entropy: -6.312870979309082
training/temp_value: 0.9615212194715013
time/total: 15241.667867660522

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 54
aug_traj/return: -107.9800135411271
aug_traj/length: 2000.0
time/training: 208.22095823287964
training/train_loss_mean: -4.399972714789603
t


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 69
aug_traj/return: -76.43034645358617
aug_traj/length: 2000.0
time/training: 208.14978981018066
training/train_loss_mean: -3.7255001207921743
training/train_loss_std: 0.17612291527592863
training/nll: -7.039045810699463
training/entropy: -3.0284829139709473
training/temp_value: 1.0279793274991018
time/total: 18781.456505537033

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 70
aug_traj/return: -64.61491324558833
aug_traj/length: 2000.0
time/training: 208.04783964157104
training/train_loss_mean: -3.6899337750174004
training/train_loss_std: 0.18607449355368488
training/nll: -7.211235523223877
training/entropy: -3.4172589778900146
training/temp_value: 1.0255866573973127
evaluation/return_mean_gm: -132.00650684447672
evaluation/return_std_gm: 85.28648847373597
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.835702180862427
time/total: 19019.906229257584

Model saved at


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 86
aug_traj/return: -30.40314330967244
aug_traj/length: 2000.0
time/training: 208.11403512954712
training/train_loss_mean: -3.214338398671421
training/train_loss_std: 0.179724746654286
training/nll: -6.129811763763428
training/entropy: -2.937971830368042
training/temp_value: 1.0208469797856716
time/total: 22559.35058569908

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 87
aug_traj/return: -40.79553847105543
aug_traj/length: 2000.0
time/training: 208.1664469242096
training/train_loss_mean: -3.205154851516627
training/train_loss_std: 0.18551481867706326
training/nll: -6.149548053741455
training/entropy: -3.043086290359497
training/temp_value: 1.0209273350563601
time/total: 22779.263051509857

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 88
aug_traj/return: -33.69110139543364
aug_traj/length: 2000.0
time/training: 208.29453468322754
training/train_loss_mean: -3.152758843859991
train


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 103
aug_traj/return: -40.277345730193296
aug_traj/length: 2000.0
time/training: 208.14998769760132
training/train_loss_mean: -2.784504084755214
training/train_loss_std: 0.1792570325025316
training/nll: -5.749133586883545
training/entropy: -3.005704879760742
training/temp_value: 0.9939028708857416
time/total: 26337.10777282715

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 104
aug_traj/return: -24.503662773315522
aug_traj/length: 2000.0
time/training: 208.1190230846405
training/train_loss_mean: -2.784084091882951
training/train_loss_std: 0.20403535467766254
training/nll: -5.970855236053467
training/entropy: -3.162177324295044
training/temp_value: 0.9903316546884892
time/total: 26556.9585647583

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 105
aug_traj/return: -29.960139746786666
aug_traj/length: 2000.0
time/training: 208.38093328475952
training/train_loss_mean: -2.755198656314572



Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 120
aug_traj/return: -39.00709875624746
aug_traj/length: 2000.0
time/training: 207.94438672065735
training/train_loss_mean: -2.5854636766254924
training/train_loss_std: 0.18860950266863863
training/nll: -5.327663421630859
training/entropy: -2.936842679977417
training/temp_value: 0.8976278623572899
evaluation/return_mean_gm: -160.70725121162428
evaluation/return_std_gm: 8.74940348040229
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.779791593551636
time/total: 30114.624593257904

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 121
aug_traj/return: -36.06583968272405
aug_traj/length: 2000.0
time/training: 208.18972539901733
training/train_loss_mean: -2.5961206668801733
training/train_loss_std: 0.20440720165661852
training/nll: -5.387688636779785
training/entropy: -3.0031650066375732
training/temp_value: 0.8898264637977449
time/total: 30334.572461605072

Model saved at


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 137
aug_traj/return: -42.777164451205245
aug_traj/length: 2000.0
time/training: 207.9924614429474
training/train_loss_mean: -2.620472377040293
training/train_loss_std: 0.24610229626759786
training/nll: -4.782012462615967
training/entropy: -2.9322779178619385
training/temp_value: 0.7134969734611162
time/total: 33874.020349025726

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 138
aug_traj/return: -77.93481466990644
aug_traj/length: 2000.0
time/training: 208.03930401802063
training/train_loss_mean: -2.6056679493232635
training/train_loss_std: 0.3935384126536085
training/nll: -4.666041851043701
training/entropy: -2.907594680786133
training/temp_value: 0.7026683075905038
time/total: 34094.020223379135

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 139
aug_traj/return: -68.84206404999566
aug_traj/length: 2000.0
time/training: 207.98947548866272
training/train_loss_mean: -2.5234253370657


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 154
aug_traj/return: -45.165912978398765
aug_traj/length: 2000.0
time/training: 207.95109009742737
training/train_loss_mean: -2.4168920380421364
training/train_loss_std: 2.82596947845978
training/nll: -4.346472263336182
training/entropy: -2.806896924972534
training/temp_value: 0.5211992502701999
time/total: 37650.91050887108

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 155
aug_traj/return: -22.985680352694786
aug_traj/length: 2000.0
time/training: 208.12851810455322
training/train_loss_mean: -2.572149136266953
training/train_loss_std: 1.3083927926720689
training/nll: -4.154379367828369
training/entropy: -2.7914836406707764
training/temp_value: 0.5105130256090679
time/total: 37870.749512434006

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 156
aug_traj/return: -31.54955470080635
aug_traj/length: 2000.0
time/training: 207.8673496246338
training/train_loss_mean: -2.6166994328474313


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 171
aug_traj/return: -38.21051329744673
aug_traj/length: 2000.0
time/training: 208.07430410385132
training/train_loss_mean: -2.556755430436954
training/train_loss_std: 0.2686936292539987
training/nll: -3.9274744987487793
training/entropy: -2.9106404781341553
training/temp_value: 0.3544458537545404
time/total: 41425.77586889267

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 172
aug_traj/return: -23.210801322404006
aug_traj/length: 2000.0
time/training: 208.00737643241882
training/train_loss_mean: -2.4656402050436523
training/train_loss_std: 1.6471150544906947
training/nll: -2.8571279048919678
training/entropy: -2.127262592315674
training/temp_value: 0.345665063844192
time/total: 41645.49858689308

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 173
aug_traj/return: -30.57233334618062
aug_traj/length: 2000.0
time/training: 208.17841720581055
training/train_loss_mean: -2.33326531727488


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 188
aug_traj/return: -31.19622611625283
aug_traj/length: 2000.0
time/training: 208.1773271560669
training/train_loss_mean: -2.3817279659252866
training/train_loss_std: 0.29516872744925976
training/nll: -3.1376614570617676
training/entropy: -2.659986972808838
training/temp_value: 0.22764141052607637
time/total: 45181.50682210922

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 189
aug_traj/return: -26.06391881923248
aug_traj/length: 2000.0
time/training: 207.95023465156555
training/train_loss_mean: -2.319149106710814
training/train_loss_std: 1.2109439241289917
training/nll: -2.7648048400878906
training/entropy: -2.307812452316284
training/temp_value: 0.22183911440801418
time/total: 45401.18835377693

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 190
aug_traj/return: -30.25568587527938
aug_traj/length: 2000.0
time/training: 208.13731241226196
training/train_loss_mean: -2.3426675023713


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 205
aug_traj/return: -112.76459016144202
aug_traj/length: 2000.0
time/training: 208.0212380886078
training/train_loss_mean: -2.1239752923181783
training/train_loss_std: 0.22168823623293535
training/nll: -2.4505486488342285
training/entropy: -2.143927574157715
training/temp_value: 0.14341868460448357
time/total: 48957.46051669121

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 206
aug_traj/return: -35.62451833965987
aug_traj/length: 2000.0
time/training: 207.93492317199707
training/train_loss_mean: -2.137661616001456
training/train_loss_std: 0.24933702385167358
training/nll: -2.3256051540374756
training/entropy: -1.9642105102539062
training/temp_value: 0.13957089747155693
time/total: 49177.1202378273

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 207
aug_traj/return: -79.00063295106283
aug_traj/length: 2000.0
time/training: 207.94421100616455
training/train_loss_mean: -2.12378894341


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 222
aug_traj/return: -83.78115684330005
aug_traj/length: 2000.0
time/training: 207.97745513916016
training/train_loss_mean: -1.944670243036398
training/train_loss_std: 0.242741161092476
training/nll: -2.7086477279663086
training/entropy: -2.479175567626953
training/temp_value: 0.08936417528417195
time/total: 52732.60067510605

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 223
aug_traj/return: -74.20159736635088
aug_traj/length: 2000.0
time/training: 208.16554260253906
training/train_loss_mean: -1.9406172315635115
training/train_loss_std: 0.2707084237337664
training/nll: -2.1106560230255127
training/entropy: -1.941340684890747
training/temp_value: 0.08696357264000094
time/total: 52952.515320539474

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 224
aug_traj/return: -71.85192891531717
aug_traj/length: 2000.0
time/training: 207.98298859596252
training/train_loss_mean: -1.9146776281615


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 239
aug_traj/return: -108.98665265576983
aug_traj/length: 2000.0
time/training: 208.333190202713
training/train_loss_mean: -1.8081241612607368
training/train_loss_std: 0.26651614076319835
training/nll: -1.8284260034561157
training/entropy: -1.667432427406311
training/temp_value: 0.056010843192240894
time/total: 56491.89881849289

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 240
aug_traj/return: -165.2924204484582
aug_traj/length: 2000.0
time/training: 208.0305209159851
training/train_loss_mean: -1.8259192595517515
training/train_loss_std: 0.2549262953806329
training/nll: -1.645906686782837
training/entropy: -1.422126054763794
training/temp_value: 0.0545198000543352
evaluation/return_mean_gm: -209.19040211597866
evaluation/return_std_gm: 0.6536052914061147
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.847212314605713
time/total: 56730.538301706314

Model saved at


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 256
aug_traj/return: -184.9493922420979
aug_traj/length: 2000.0
time/training: 208.07009100914001
training/train_loss_mean: -1.8435950966663532
training/train_loss_std: 0.2625821857691842
training/nll: -1.8870296478271484
training/entropy: -1.6946651935577393
training/temp_value: 0.03542383645313765
time/total: 60269.4414536953

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 257
aug_traj/return: -198.66528979432323
aug_traj/length: 2000.0
time/training: 208.38549757003784
training/train_loss_mean: -1.8433659821949873
training/train_loss_std: 0.2630725828035796
training/nll: -2.2088539600372314
training/entropy: -2.0420947074890137
training/temp_value: 0.03451082327933826
time/total: 60489.447944164276

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 258
aug_traj/return: -210.4032450409955
aug_traj/length: 2000.0
time/training: 208.09607076644897
training/train_loss_mean: -1.909524706


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 273
aug_traj/return: -182.7059605300368
aug_traj/length: 2000.0
time/training: 207.25529670715332
training/train_loss_mean: -2.542494648386288
training/train_loss_std: 0.2556597258825308
training/nll: -2.2931747436523438
training/entropy: -2.1214823722839355
training/temp_value: 0.02395704061447113
time/total: 64039.12169313431

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 274
aug_traj/return: -144.44837181560155
aug_traj/length: 2000.0
time/training: 207.30873250961304
training/train_loss_mean: -2.5750403349697284
training/train_loss_std: 0.2596733102690632
training/nll: -3.0818488597869873
training/entropy: -2.826934576034546
training/temp_value: 0.023484128336949406
time/total: 64258.18871188164

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 275
aug_traj/return: -115.33785481324063
aug_traj/length: 2000.0
time/training: 207.3921709060669
training/train_loss_mean: -2.5628682202


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 290
aug_traj/return: -194.17859470706077
aug_traj/length: 2000.0
time/training: 207.11393547058105
training/train_loss_mean: -3.269398813573298
training/train_loss_std: 0.31177889697283345
training/nll: -3.001699209213257
training/entropy: -2.7027206420898438
training/temp_value: 0.02043786495703201
evaluation/return_mean_gm: -203.99030021192434
evaluation/return_std_gm: 0.8007283418126657
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.75511074066162
time/total: 67801.99566602707

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 291
aug_traj/return: -194.83873549472688
aug_traj/length: 2000.0
time/training: 207.34576416015625
training/train_loss_mean: -3.3695036173230664
training/train_loss_std: 0.29932040024129203
training/nll: -2.5405492782592773
training/entropy: -3.0745697021484375
training/temp_value: 0.020837975205878663
time/total: 68020.92804765701

Model sav


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 307
aug_traj/return: -202.58492740056514
aug_traj/length: 2000.0
time/training: 207.0681393146515
training/train_loss_mean: -4.009260273204949
training/train_loss_std: 0.28672104680091337
training/nll: -4.126654148101807
training/entropy: -3.7252907752990723
training/temp_value: 0.03496411338662347
time/total: 71544.57201194763

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 308
aug_traj/return: -200.22805308109793
aug_traj/length: 2000.0
time/training: 207.29017901420593
training/train_loss_mean: -4.028852101382877
training/train_loss_std: 0.28691019677493335
training/nll: -4.494271755218506
training/entropy: -4.097317218780518
training/temp_value: 0.036161364007058855
time/total: 71763.14838051796

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 309
aug_traj/return: -205.3777837370867
aug_traj/length: 2000.0
time/training: 207.16392135620117
training/train_loss_mean: -4.07533087583


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 324
aug_traj/return: -204.94223633062856
aug_traj/length: 2000.0
time/training: 207.08114218711853
training/train_loss_mean: -4.484529124566581
training/train_loss_std: 0.3005527899244266
training/nll: -5.088303089141846
training/entropy: -4.610504150390625
training/temp_value: 0.06254462800691218
time/total: 75303.65174484253

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 325
aug_traj/return: -198.99658607816895
aug_traj/length: 2000.0
time/training: 207.1981062889099
training/train_loss_mean: -4.492793680294396
training/train_loss_std: 0.27747209434234726
training/nll: -4.813474655151367
training/entropy: -4.327358722686768
training/temp_value: 0.06465120764849042
time/total: 75522.39449977875

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 326
aug_traj/return: -192.57316525476452
aug_traj/length: 2000.0
time/training: 207.01507592201233
training/train_loss_mean: -4.5164593914070


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 341
aug_traj/return: -196.6705976332653
aug_traj/length: 2000.0
time/training: 207.44989895820618
training/train_loss_mean: -4.560467404736794
training/train_loss_std: 0.23058737352653216
training/nll: -5.281213760375977
training/entropy: -4.351142883300781
training/temp_value: 0.10876193258994282
time/total: 79064.44997310638

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 342
aug_traj/return: -154.9983658865313
aug_traj/length: 2000.0
time/training: 207.51289248466492
training/train_loss_mean: -4.551261380218138
training/train_loss_std: 0.2545142016002629
training/nll: -4.658951282501221
training/entropy: -4.025845050811768
training/temp_value: 0.11225518768870853
time/total: 79283.77618813515

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 343
aug_traj/return: -194.3528412311959
aug_traj/length: 2000.0
time/training: 207.83884000778198
training/train_loss_mean: -4.550643810172612


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 358
aug_traj/return: -175.14337041661565
aug_traj/length: 2000.0
time/training: 207.83783078193665
training/train_loss_mean: -4.370854076915454
training/train_loss_std: 0.21142011361952678
training/nll: -4.973776340484619
training/entropy: -3.883669376373291
training/temp_value: 0.18543622847746102
time/total: 82816.159324646

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 359
aug_traj/return: -196.98307451504928
aug_traj/length: 2000.0
time/training: 207.80090618133545
training/train_loss_mean: -4.363092426483779
training/train_loss_std: 0.22283552372336532
training/nll: -5.24754524230957
training/entropy: -3.909151792526245
training/temp_value: 0.19109180531140962
time/total: 83035.84431910515

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 360
aug_traj/return: -185.43024872527198
aug_traj/length: 2000.0
time/training: 208.06812405586243
training/train_loss_mean: -4.35249304702723


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 375
aug_traj/return: -171.33416023171154
aug_traj/length: 2000.0
time/training: 207.92985153198242
training/train_loss_mean: -4.055317601496102
training/train_loss_std: 0.16772712057349687
training/nll: -5.067444801330566
training/entropy: -3.6401703357696533
training/temp_value: 0.3093233844487893
time/total: 86587.94824624062

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 376
aug_traj/return: -186.03952069544
aug_traj/length: 2000.0
time/training: 207.72229743003845
training/train_loss_mean: -4.003634719099152
training/train_loss_std: 0.16979956048073036
training/nll: -4.920069694519043
training/entropy: -3.4585461616516113
training/temp_value: 0.31828373963672585
time/total: 86807.19810962677

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 377
aug_traj/return: -186.39803116796554
aug_traj/length: 2000.0
time/training: 208.0562138557434
training/train_loss_mean: -4.00170829311805


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 392
aug_traj/return: -138.00632569664805
aug_traj/length: 2000.0
time/training: 207.892098903656
training/train_loss_mean: -3.4655463511471694
training/train_loss_std: 0.1418907195876361
training/nll: -5.266205310821533
training/entropy: -3.271595001220703
training/temp_value: 0.47215424288307317
time/total: 90360.01955294609

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 393
aug_traj/return: -143.07357859336756
aug_traj/length: 2000.0
time/training: 208.02609658241272
training/train_loss_mean: -3.423439683091026
training/train_loss_std: 0.13459715105903586
training/nll: -5.0674896240234375
training/entropy: -3.2396652698516846
training/temp_value: 0.4801579044717653
time/total: 90579.91225361824

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 394
aug_traj/return: -195.74911827920042
aug_traj/length: 2000.0
time/training: 208.03583693504333
training/train_loss_mean: -3.387679739386


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 409
aug_traj/return: -59.57085379013429
aug_traj/length: 2000.0
time/training: 207.90196466445923
training/train_loss_mean: -3.239703232572205
training/train_loss_std: 0.1401442769618463
training/nll: -4.940579414367676
training/entropy: -3.0623323917388916
training/temp_value: 0.520371357407043
time/total: 94113.54802775383

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 410
aug_traj/return: -64.39453825099794
aug_traj/length: 2000.0
time/training: 207.84777808189392
training/train_loss_mean: -3.228902870860338
training/train_loss_std: 0.130287005317676
training/nll: -4.653579235076904
training/entropy: -2.7857730388641357
training/temp_value: 0.5167833192688274
evaluation/return_mean_gm: -227.59302114520477
evaluation/return_std_gm: 0.46104841709309297
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.783382654190063
time/total: 94352.07037520409

Model saved at ./e


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 426
aug_traj/return: -68.91871539879715
aug_traj/length: 2000.0
time/training: 207.71924257278442
training/train_loss_mean: -3.3168814183985647
training/train_loss_std: 0.12957198629856573
training/nll: -4.639056205749512
training/entropy: -2.828533887863159
training/temp_value: 0.45011426942277016
time/total: 97884.90870571136

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 427
aug_traj/return: -48.84790884611595
aug_traj/length: 2000.0
time/training: 207.80049562454224
training/train_loss_mean: -3.326446265592379
training/train_loss_std: 0.13852865930943156
training/nll: -4.655713081359863
training/entropy: -2.9783949851989746
training/temp_value: 0.44525491055145
time/total: 98104.61107993126

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 428
aug_traj/return: -76.73089275746865
aug_traj/length: 2000.0
time/training: 207.93964743614197
training/train_loss_mean: -3.336246066418061


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 443
aug_traj/return: -128.0687846507104
aug_traj/length: 2000.0
time/training: 208.05920028686523
training/train_loss_mean: -3.4022691210126217
training/train_loss_std: 0.13878405571112
training/nll: -4.295102119445801
training/entropy: -2.7261714935302734
training/temp_value: 0.36777216724204426
time/total: 101658.3022351265

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 444
aug_traj/return: -172.55609399197974
aug_traj/length: 2000.0
time/training: 207.75860261917114
training/train_loss_mean: -3.4400258761191584
training/train_loss_std: 0.14209582378240498
training/nll: -4.892227649688721
training/entropy: -3.2226154804229736
training/temp_value: 0.3657085269421864
time/total: 101877.54157614708

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 445
aug_traj/return: -134.32319160662973
aug_traj/length: 2000.0
time/training: 207.83244228363037
training/train_loss_mean: -3.44397454973


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 460
aug_traj/return: -151.84242350647065
aug_traj/length: 2000.0
time/training: 208.1049497127533
training/train_loss_mean: -3.393120957646385
training/train_loss_std: 0.14635449377390852
training/nll: -4.313610076904297
training/entropy: -2.83512806892395
training/temp_value: 0.3014864964555165
evaluation/return_mean_gm: -161.5321541553225
evaluation/return_std_gm: 3.039220449800858
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.72848129272461
time/total: 105430.4249560833

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 461
aug_traj/return: -190.09856959160274
aug_traj/length: 2000.0
time/training: 207.91252088546753
training/train_loss_mean: -3.3840497625545196
training/train_loss_std: 0.13865994160681708
training/nll: -4.063591003417969
training/entropy: -2.784290075302124
training/temp_value: 0.2969466325276668
time/total: 105650.1400988102

Model saved at ./ex


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 477
aug_traj/return: -165.2952197101811
aug_traj/length: 2000.0
time/training: 207.97009468078613
training/train_loss_mean: -3.3391601211669135
training/train_loss_std: 0.13740631942142975
training/nll: -3.788797616958618
training/entropy: -2.797940492630005
training/temp_value: 0.236446468082348
time/total: 109183.8126077652

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 478
aug_traj/return: -188.74939424823
aug_traj/length: 2000.0
time/training: 207.955575466156
training/train_loss_mean: -3.3490264707857165
training/train_loss_std: 0.14589765114450257
training/nll: -3.8107335567474365
training/entropy: -2.734907627105713
training/temp_value: 0.23293155260535076
time/total: 109403.50629878044

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 479
aug_traj/return: -147.01218091924488
aug_traj/length: 2000.0
time/training: 207.90837836265564
training/train_loss_mean: -3.343450949159617


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 494
aug_traj/return: -160.27135422824722
aug_traj/length: 2000.0
time/training: 207.7576766014099
training/train_loss_mean: -3.2106203135805145
training/train_loss_std: 0.1348827620190576
training/nll: -3.4763708114624023
training/entropy: -2.6039111614227295
training/temp_value: 0.173807372046945
time/total: 112954.84786367416

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 495
aug_traj/return: -175.25228081077086
aug_traj/length: 2000.0
time/training: 207.7757499217987
training/train_loss_mean: -3.209043200613379
training/train_loss_std: 0.13803297611237744
training/nll: -3.5127577781677246
training/entropy: -2.817850112915039
training/temp_value: 0.17007293540359716
time/total: 113174.32207870483

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 496
aug_traj/return: -173.18023376687728
aug_traj/length: 2000.0
time/training: 207.94839882850647
training/train_loss_mean: -3.2020713185


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 511
aug_traj/return: -183.80583027540416
aug_traj/length: 2000.0
time/training: 207.81788778305054
training/train_loss_mean: -3.0764748083052034
training/train_loss_std: 0.1194788740454381
training/nll: -3.3981544971466064
training/entropy: -2.8711955547332764
training/temp_value: 0.11685898042727062
time/total: 116726.27221679688

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 512
aug_traj/return: -190.8003366823581
aug_traj/length: 2000.0
time/training: 207.80988764762878
training/train_loss_mean: -3.0484603436318594
training/train_loss_std: 0.11800460939185607
training/nll: -3.387600898742676
training/entropy: -2.80977725982666
training/temp_value: 0.11370867972772056
time/total: 116945.83530807495

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 513
aug_traj/return: -188.0907543883015
aug_traj/length: 2000.0
time/training: 207.81621432304382
training/train_loss_mean: -3.040900062


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 528
aug_traj/return: -205.26946648813617
aug_traj/length: 2000.0
time/training: 207.84826278686523
training/train_loss_mean: -2.922103953138846
training/train_loss_std: 0.1049693587575926
training/nll: -2.9512076377868652
training/entropy: -2.5866005420684814
training/temp_value: 0.07499066070333178
time/total: 120477.62912344933

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 529
aug_traj/return: -187.7039334898923
aug_traj/length: 2000.0
time/training: 207.91580533981323
training/train_loss_mean: -2.9110633627917295
training/train_loss_std: 0.1046180640397066
training/nll: -2.9582178592681885
training/entropy: -2.6488091945648193
training/temp_value: 0.073003696659188
time/total: 120697.2890393734

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 530
aug_traj/return: -199.2101574801623
aug_traj/length: 2000.0
time/training: 207.82227730751038
training/train_loss_mean: -2.91005345344


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 545
aug_traj/return: -204.71541307268942
aug_traj/length: 2000.0
time/training: 207.82284426689148
training/train_loss_mean: -2.8365657828389588
training/train_loss_std: 0.10107504204448638
training/nll: -3.1629562377929688
training/entropy: -2.754809617996216
training/temp_value: 0.04766441370392369
time/total: 124246.76486444473

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 546
aug_traj/return: -193.03194933059495
aug_traj/length: 2000.0
time/training: 208.11868143081665
training/train_loss_mean: -2.8298893530216005
training/train_loss_std: 0.09736497704418494
training/nll: -3.0130176544189453
training/entropy: -2.7839596271514893
training/temp_value: 0.04640559734838519
time/total: 124466.80476093292

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 547
aug_traj/return: -189.1575275975587
aug_traj/length: 2000.0
time/training: 207.8234624862671
training/train_loss_mean: -2.831107


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 562
aug_traj/return: -200.26948256478084
aug_traj/length: 2000.0
time/training: 207.8107659816742
training/train_loss_mean: -2.8365288231968293
training/train_loss_std: 0.09352848158708264
training/nll: -2.6987144947052
training/entropy: -2.460758924484253
training/temp_value: 0.03073483264639888
time/total: 128017.33660340309

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 563
aug_traj/return: -202.96852145511713
aug_traj/length: 2000.0
time/training: 207.75489592552185
training/train_loss_mean: -2.845075854818008
training/train_loss_std: 0.09469221703890025
training/nll: -2.843715190887451
training/entropy: -2.573054552078247
training/temp_value: 0.029964556111146783
time/total: 128237.55782341957

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 564
aug_traj/return: -194.9038611783381
aug_traj/length: 2000.0
time/training: 207.69202637672424
training/train_loss_mean: -2.83928713775


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 579
aug_traj/return: -206.06467130479132
aug_traj/length: 2000.0
time/training: 207.61702179908752
training/train_loss_mean: -2.88500789281035
training/train_loss_std: 0.08647602258593488
training/nll: -2.969768762588501
training/entropy: -2.760671615600586
training/temp_value: 0.020053372482090496
time/total: 131770.09745502472

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 580
aug_traj/return: -197.90523464837617
aug_traj/length: 2000.0
time/training: 207.60342860221863
training/train_loss_mean: -2.889368165943235
training/train_loss_std: 0.0893713007016445
training/nll: -2.902622938156128
training/entropy: -2.681145191192627
training/temp_value: 0.01958369162112808
evaluation/return_mean_gm: -175.8168266984283
evaluation/return_std_gm: 3.78673969898819
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.78112006187439
time/total: 132008.55897140503

Model saved at .


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 596
aug_traj/return: -199.5559882267688
aug_traj/length: 2000.0
time/training: 207.6977207660675
training/train_loss_mean: -3.026686410891806
training/train_loss_std: 0.08900677507298245
training/nll: -3.043347120285034
training/entropy: -2.81620192527771
training/temp_value: 0.01386320423262487
time/total: 135540.88315677643

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 597
aug_traj/return: -204.7162090625572
aug_traj/length: 2000.0
time/training: 207.48110961914062
training/train_loss_mean: -3.0355139761151033
training/train_loss_std: 0.08622060435416742
training/nll: -3.01332688331604
training/entropy: -2.815035343170166
training/temp_value: 0.013627674806315195
time/total: 135760.37854456902

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 598
aug_traj/return: -205.41961186018366
aug_traj/length: 2000.0
time/training: 207.67520928382874
training/train_loss_mean: -3.049582267348


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 613
aug_traj/return: -203.76427015484418
aug_traj/length: 2000.0
time/training: 207.75701022148132
training/train_loss_mean: -3.322224658614907
training/train_loss_std: 0.08309269156304942
training/nll: -3.5272631645202637
training/entropy: -3.2878639698028564
training/temp_value: 0.015257554222147843
time/total: 139310.9908027649

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 614
aug_traj/return: -183.7571197751609
aug_traj/length: 2000.0
time/training: 207.98120617866516
training/train_loss_mean: -3.336989356364325
training/train_loss_std: 0.07722812336776115
training/nll: -3.5132827758789062
training/entropy: -3.3401849269866943
training/temp_value: 0.01577804336558529
time/total: 139530.79558897018

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 615
aug_traj/return: -205.35193271541212
aug_traj/length: 2000.0
time/training: 207.70173263549805
training/train_loss_mean: -3.359262


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 630
aug_traj/return: -191.56615177298727
aug_traj/length: 2000.0
time/training: 207.93061423301697
training/train_loss_mean: -3.543754882296222
training/train_loss_std: 0.06741847964604061
training/nll: -3.6571099758148193
training/entropy: -3.304190158843994
training/temp_value: 0.02738035864192165
evaluation/return_mean_gm: -208.0461737471068
evaluation/return_std_gm: 4.454508459095845
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.736783981323242
time/total: 143084.66202616692

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 631
aug_traj/return: -198.09047255920973
aug_traj/length: 2000.0
time/training: 207.9469997882843
training/train_loss_mean: -3.550347933791874
training/train_loss_std: 0.07105976006652438
training/nll: -3.647118091583252
training/entropy: -3.4106462001800537
training/temp_value: 0.028335064096225233
time/total: 143304.06421899796

Model saved


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 647
aug_traj/return: -190.86783726670438
aug_traj/length: 2000.0
time/training: 208.21199941635132
training/train_loss_mean: -3.6321417406620444
training/train_loss_std: 0.057293397665794825
training/nll: -3.789815902709961
training/entropy: -3.4862382411956787
training/temp_value: 0.04829453432319141
time/total: 146840.0158188343

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 648
aug_traj/return: -189.9903046705111
aug_traj/length: 2000.0
time/training: 207.9676070213318
training/train_loss_mean: -3.631991642487198
training/train_loss_std: 0.05812380362810608
training/nll: -3.7391178607940674
training/entropy: -3.397536039352417
training/temp_value: 0.04988264009031648
time/total: 147059.7445347309

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 649
aug_traj/return: -189.17244221488158
aug_traj/length: 2000.0
time/training: 207.87879538536072
training/train_loss_mean: -3.629794861


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 664
aug_traj/return: -201.28929252631437
aug_traj/length: 2000.0
time/training: 208.01381492614746
training/train_loss_mean: -3.5893822841228573
training/train_loss_std: 0.050439401428351356
training/nll: -4.018435478210449
training/entropy: -3.4722695350646973
training/temp_value: 0.08272773339083778
time/total: 150613.11238336563

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 665
aug_traj/return: -198.36863978515413
aug_traj/length: 2000.0
time/training: 207.9922502040863
training/train_loss_mean: -3.5782198546629993
training/train_loss_std: 0.050557829309377696
training/nll: -3.8723552227020264
training/entropy: -3.2759079933166504
training/temp_value: 0.08528037113849879
time/total: 150832.650806427

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 666
aug_traj/return: -206.5081161942929
aug_traj/length: 2000.0
time/training: 207.97005128860474
training/train_loss_mean: -3.566728


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 681
aug_traj/return: -205.34182689585364
aug_traj/length: 2000.0
time/training: 208.11428689956665
training/train_loss_mean: -3.426864269024338
training/train_loss_std: 0.0507018335180219
training/nll: -3.9010510444641113
training/entropy: -3.1748969554901123
training/temp_value: 0.13482852559017888
time/total: 154385.099919796

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 682
aug_traj/return: -201.6736787190856
aug_traj/length: 2000.0
time/training: 208.02439379692078
training/train_loss_mean: -3.417517390778561
training/train_loss_std: 0.049285732469864695
training/nll: -3.810983657836914
training/entropy: -3.1395037174224854
training/temp_value: 0.13836674402342333
time/total: 154604.95972251892

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 683
aug_traj/return: -193.85123987462507
aug_traj/length: 2000.0
time/training: 208.24252319335938
training/train_loss_mean: -3.410081435


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 698
aug_traj/return: -192.8908007379618
aug_traj/length: 2000.0
time/training: 208.11835026741028
training/train_loss_mean: -3.2464085781869656
training/train_loss_std: 0.04318024510190702
training/nll: -3.8227691650390625
training/entropy: -2.9666240215301514
training/temp_value: 0.1868279155104792
time/total: 158140.85442566872

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 699
aug_traj/return: -188.41012141157128
aug_traj/length: 2000.0
time/training: 207.9851679801941
training/train_loss_mean: -3.2472650601361246
training/train_loss_std: 0.046888815946174466
training/nll: -3.779022455215454
training/entropy: -2.9099667072296143
training/temp_value: 0.18783052218048418
time/total: 158360.58036661148

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 700
aug_traj/return: -182.56649111533324
aug_traj/length: 2000.0
time/training: 207.97138142585754
training/train_loss_mean: -3.237890


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 715
aug_traj/return: -197.03781029430482
aug_traj/length: 2000.0
time/training: 208.04271817207336
training/train_loss_mean: -3.2004192780841847
training/train_loss_std: 0.05063931955948442
training/nll: -3.688908100128174
training/entropy: -3.043243169784546
training/temp_value: 0.18012477808176616
time/total: 161914.1237130165

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 716
aug_traj/return: -202.39346260522308
aug_traj/length: 2000.0
time/training: 208.10483121871948
training/train_loss_mean: -3.203463144746157
training/train_loss_std: 0.049465989007668215
training/nll: -3.7155916690826416
training/entropy: -2.9065823554992676
training/temp_value: 0.17952840943470957
time/total: 162134.09258294106

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 717
aug_traj/return: -185.97624778768855
aug_traj/length: 2000.0
time/training: 208.09915614128113
training/train_loss_mean: -3.205013


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 732
aug_traj/return: -189.08967173109303
aug_traj/length: 2000.0
time/training: 207.85178780555725
training/train_loss_mean: -3.1910930245141476
training/train_loss_std: 0.04889207058038168
training/nll: -3.6701807975769043
training/entropy: -2.9300858974456787
training/temp_value: 0.15561302899952076
time/total: 165689.13168358803

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 733
aug_traj/return: -174.73407982529073
aug_traj/length: 2000.0
time/training: 208.023184299469
training/train_loss_mean: -3.186238385314897
training/train_loss_std: 0.05161265528942792
training/nll: -3.6488759517669678
training/entropy: -3.056129217147827
training/temp_value: 0.1536251208857484
time/total: 165908.91025781631

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 734
aug_traj/return: -172.62496325400156
aug_traj/length: 2000.0
time/training: 208.22750306129456
training/train_loss_mean: -3.17935211


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 749
aug_traj/return: -201.69303449324812
aug_traj/length: 2000.0
time/training: 208.1640350818634
training/train_loss_mean: -3.173165145960912
training/train_loss_std: 0.050538059772216024
training/nll: -3.4820139408111572
training/entropy: -2.933959722518921
training/temp_value: 0.12339240155675198
time/total: 169446.07812404633

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 750
aug_traj/return: -193.09632472815807
aug_traj/length: 2000.0
time/training: 208.0706808567047
training/train_loss_mean: -3.169420367512417
training/train_loss_std: 0.052933848163525636
training/nll: -3.6085240840911865
training/entropy: -3.061641216278076
training/temp_value: 0.12160281614226118
evaluation/return_mean_gm: -200.51736141763416
evaluation/return_std_gm: 4.812891183650644
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.71589469909668
time/total: 169684.95847034454

Model saved


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 766
aug_traj/return: -201.95050931946776
aug_traj/length: 2000.0
time/training: 207.89399671554565
training/train_loss_mean: -3.1624033123348694
training/train_loss_std: 0.05383160674729593
training/nll: -3.4385461807250977
training/entropy: -2.953294515609741
training/temp_value: 0.0971416815941908
time/total: 173220.76929926872

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 767
aug_traj/return: -197.79906300453655
aug_traj/length: 2000.0
time/training: 208.014164686203
training/train_loss_mean: -3.159112800512553
training/train_loss_std: 0.05179557644636792
training/nll: -3.4770209789276123
training/entropy: -2.9761769771575928
training/temp_value: 0.09569887371515376
time/total: 173440.70353889465

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 768
aug_traj/return: -183.67292665544215
aug_traj/length: 2000.0
time/training: 207.84973526000977
training/train_loss_mean: -3.16249562


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 783
aug_traj/return: -206.5980863076977
aug_traj/length: 2000.0
time/training: 208.03328847885132
training/train_loss_mean: -3.1374788488120076
training/train_loss_std: 0.05235093647536019
training/nll: -3.3468027114868164
training/entropy: -2.966144561767578
training/temp_value: 0.07244802363776273
time/total: 176995.336540699

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 784
aug_traj/return: -186.21313295854443
aug_traj/length: 2000.0
time/training: 207.88163781166077
training/train_loss_mean: -3.1383981751486165
training/train_loss_std: 0.0494470527140883
training/nll: -3.3769640922546387
training/entropy: -2.975645065307617
training/temp_value: 0.07124583092342109
time/total: 177215.19918227196

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 785
aug_traj/return: -198.57254804508162
aug_traj/length: 2000.0
time/training: 208.0859899520874
training/train_loss_mean: -3.1348819043


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 800
aug_traj/return: -196.362766007447
aug_traj/length: 2000.0
time/training: 208.0278353691101
training/train_loss_mean: -3.119040534016853
training/train_loss_std: 0.05052181122029566
training/nll: -3.337618589401245
training/entropy: -2.975761651992798
training/temp_value: 0.0518289333382399
evaluation/return_mean_gm: -216.22406601893712
evaluation/return_std_gm: 1.0821728460787545
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.75276279449463
time/total: 180770.41485381126

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 801
aug_traj/return: -196.1642396023912
aug_traj/length: 2000.0
time/training: 208.15856742858887
training/train_loss_mean: -3.117405536031889
training/train_loss_std: 0.0484218138033005
training/nll: -3.332036018371582
training/entropy: -3.0464866161346436
training/temp_value: 0.050798410735303355
time/total: 180990.4129011631

Model saved at ./


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 817
aug_traj/return: -153.51819804063908
aug_traj/length: 2000.0
time/training: 208.2049069404602
training/train_loss_mean: -3.0941733091600234
training/train_loss_std: 0.04416741172972262
training/nll: -3.1584954261779785
training/entropy: -2.8870084285736084
training/temp_value: 0.03594237560965508
time/total: 184527.13025522232

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 818
aug_traj/return: -140.4110813918227
aug_traj/length: 2000.0
time/training: 207.95797300338745
training/train_loss_mean: -3.092001151341098
training/train_loss_std: 0.04224096255416733
training/nll: -3.2340190410614014
training/entropy: -2.9298367500305176
training/temp_value: 0.03511790530391853
time/total: 184746.74025917053

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 819
aug_traj/return: -144.53312602360288
aug_traj/length: 2000.0
time/training: 208.03052234649658
training/train_loss_mean: -3.092474


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 834
aug_traj/return: -158.7029930275176
aug_traj/length: 2000.0
time/training: 208.01173639297485
training/train_loss_mean: -3.0715452246669126
training/train_loss_std: 0.036940834904346444
training/nll: -3.1499130725860596
training/entropy: -2.868957281112671
training/temp_value: 0.023757035287857532
time/total: 188302.03502941132

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 835
aug_traj/return: -160.23361492381605
aug_traj/length: 2000.0
time/training: 208.0275640487671
training/train_loss_mean: -3.0637821749865646
training/train_loss_std: 0.042240182696239414
training/nll: -3.2115917205810547
training/entropy: -2.984856367111206
training/temp_value: 0.02321101867389812
time/total: 188521.6487030983

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 836
aug_traj/return: -138.9422519379319
aug_traj/length: 2000.0
time/training: 207.94628977775574
training/train_loss_mean: -3.068933


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 851
aug_traj/return: -56.83899109469603
aug_traj/length: 2000.0
time/training: 207.88400030136108
training/train_loss_mean: -3.035819614431558
training/train_loss_std: 0.034566010175135345
training/nll: -3.0598487854003906
training/entropy: -2.833578109741211
training/temp_value: 0.015276882375993712
time/total: 192076.8478589058

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 852
aug_traj/return: -58.95079876583499
aug_traj/length: 2000.0
time/training: 208.17230820655823
training/train_loss_mean: -3.034106672672846
training/train_loss_std: 0.03439269069657502
training/nll: -3.048860549926758
training/entropy: -2.845712184906006
training/temp_value: 0.01488429978019765
time/total: 192297.01239180565

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 853
aug_traj/return: -67.71557471820064
aug_traj/length: 2000.0
time/training: 207.99254250526428
training/train_loss_mean: -3.0320889365


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 868
aug_traj/return: -91.22509484601537
aug_traj/length: 2000.0
time/training: 208.04334688186646
training/train_loss_mean: -3.0093028746521298
training/train_loss_std: 0.03253107526136372
training/nll: -3.05167293548584
training/entropy: -2.869966506958008
training/temp_value: 0.0096833625043759
time/total: 195832.9837257862

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 869
aug_traj/return: -89.74926414724482
aug_traj/length: 2000.0
time/training: 208.00090050697327
training/train_loss_mean: -3.0093797056044327
training/train_loss_std: 0.03110843492500914
training/nll: -3.032028913497925
training/entropy: -2.777104139328003
training/temp_value: 0.009423253020194115
time/total: 196052.6401269436

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 870
aug_traj/return: -100.63276125162852
aug_traj/length: 2000.0
time/training: 207.89860844612122
training/train_loss_mean: -3.005075844415


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 885
aug_traj/return: -94.1048472206601
aug_traj/length: 2000.0
time/training: 208.0461449623108
training/train_loss_mean: -2.995288005322505
training/train_loss_std: 0.028309074777063643
training/nll: -3.048882007598877
training/entropy: -2.7946465015411377
training/temp_value: 0.006156314902854344
time/total: 199608.02402710915

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 886
aug_traj/return: -74.8146475885704
aug_traj/length: 2000.0
time/training: 208.01563119888306
training/train_loss_mean: -2.995110516276282
training/train_loss_std: 0.033597078262253534
training/nll: -3.0270779132843018
training/entropy: -2.89347243309021
training/temp_value: 0.005992347171955294
time/total: 199827.93254423141

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 887
aug_traj/return: -71.50323077531235
aug_traj/length: 2000.0
time/training: 208.01065516471863
training/train_loss_mean: -2.9946890354


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 902
aug_traj/return: -151.66960830382928
aug_traj/length: 2000.0
time/training: 208.28343653678894
training/train_loss_mean: -2.9751533889669655
training/train_loss_std: 0.03329136294894623
training/nll: -2.9889609813690186
training/entropy: -2.7992451190948486
training/temp_value: 0.003902231227844118
time/total: 203382.66550803185

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 903
aug_traj/return: -159.9220556378528
aug_traj/length: 2000.0
time/training: 208.07300353050232
training/train_loss_mean: -2.9760526753193064
training/train_loss_std: 0.03247883874998038
training/nll: -3.019090414047241
training/entropy: -2.8177812099456787
training/temp_value: 0.0037971420628095456
time/total: 203602.72623300552

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 904
aug_traj/return: -142.25901827761967
aug_traj/length: 2000.0
time/training: 208.01199007034302
training/train_loss_mean: -2.97


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 919
aug_traj/return: -64.35866248039652
aug_traj/length: 2000.0
time/training: 207.98174166679382
training/train_loss_mean: -2.968395042893445
training/train_loss_std: 0.032563673499451676
training/nll: -2.956144094467163
training/entropy: -2.7189061641693115
training/temp_value: 0.002478553905557573
time/total: 207137.3782300949

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 920
aug_traj/return: -96.30980173070556
aug_traj/length: 2000.0
time/training: 208.00146651268005
training/train_loss_mean: -2.980063392090912
training/train_loss_std: 0.029343958841463975
training/nll: -2.997809648513794
training/entropy: -2.8159873485565186
training/temp_value: 0.0024155238428081715
evaluation/return_mean_gm: -226.48371346401944
evaluation/return_std_gm: 2.1217897072653793
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.754774570465088
time/total: 207375.77048635483

Model s


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 936
aug_traj/return: -157.35898978293568
aug_traj/length: 2000.0
time/training: 207.84445238113403
training/train_loss_mean: -2.9980406599910525
training/train_loss_std: 0.03118178892631273
training/nll: -3.0038766860961914
training/entropy: -2.8330249786376953
training/temp_value: 0.0015907683211668184
time/total: 210909.73233652115

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 937
aug_traj/return: -170.7478944916163
aug_traj/length: 2000.0
time/training: 207.88896989822388
training/train_loss_mean: -2.991449808825457
training/train_loss_std: 0.031103008713298408
training/nll: -2.947446346282959
training/entropy: -2.7863852977752686
training/temp_value: 0.0015500307945638496
time/total: 211129.41752314568

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 938
aug_traj/return: -127.29573654418405
aug_traj/length: 2000.0
time/training: 207.9755663871765
training/train_loss_mean: -2.99


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 953
aug_traj/return: -167.87052359365015
aug_traj/length: 2000.0
time/training: 207.89535856246948
training/train_loss_mean: -2.9951555508065186
training/train_loss_std: 0.030880218553307366
training/nll: -2.973623037338257
training/entropy: -2.765913724899292
training/temp_value: 0.001015217126963186
time/total: 214682.22821998596

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 954
aug_traj/return: -158.02680999117192
aug_traj/length: 2000.0
time/training: 207.90205788612366
training/train_loss_mean: -2.993384377106842
training/train_loss_std: 0.03380857836247
training/nll: -2.9329261779785156
training/entropy: -2.8013951778411865
training/temp_value: 0.0009886972080021118
time/total: 214902.1005232334

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 955
aug_traj/return: -146.28122627750932
aug_traj/length: 2000.0
time/training: 207.92418909072876
training/train_loss_mean: -2.988135


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 970
aug_traj/return: -110.13870868172884
aug_traj/length: 2000.0
time/training: 208.00513076782227
training/train_loss_mean: -2.978529637892274
training/train_loss_std: 0.03289240513520638
training/nll: -3.041107416152954
training/entropy: -2.8696563243865967
training/temp_value: 0.0006446212925027537
evaluation/return_mean_gm: -225.29935141105608
evaluation/return_std_gm: 3.167869731423139
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.76820421218872
time/total: 218453.4917447567

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 971
aug_traj/return: -117.3629570310529
aug_traj/length: 2000.0
time/training: 207.91531014442444
training/train_loss_mean: -2.9781003454962685
training/train_loss_std: 0.03433202690996027
training/nll: -2.9672861099243164
training/entropy: -2.7869622707366943
training/temp_value: 0.0006276636575096023
time/total: 218673.1722998619

Model sa


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 987
aug_traj/return: -130.80188414930214
aug_traj/length: 2000.0
time/training: 207.9136130809784
training/train_loss_mean: -2.963695337189181
training/train_loss_std: 0.03332307234208082
training/nll: -2.9450647830963135
training/entropy: -2.8231093883514404
training/temp_value: 0.000408505453293209
time/total: 222206.97222614288

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 988
aug_traj/return: -67.50905606907335
aug_traj/length: 2000.0
time/training: 207.94115495681763
training/train_loss_mean: -2.9613952657723717
training/train_loss_std: 0.0331305676075783
training/nll: -3.006730794906616
training/entropy: -2.7858493328094482
training/temp_value: 0.0003976674745979242
time/total: 222426.87906837463

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 989
aug_traj/return: -83.02651196744972
aug_traj/length: 2000.0
time/training: 207.99955868721008
training/train_loss_mean: -2.958272


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1004
aug_traj/return: -129.0567568658384
aug_traj/length: 2000.0
time/training: 207.98208355903625
training/train_loss_mean: -2.9229199184810675
training/train_loss_std: 0.03395117925397979
training/nll: -2.937074661254883
training/entropy: -2.7725515365600586
training/temp_value: 0.0002567300980460286
time/total: 225980.71074652672

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1005
aug_traj/return: -132.22465847800223
aug_traj/length: 2000.0
time/training: 208.1396553516388
training/train_loss_mean: -2.9201442621000484
training/train_loss_std: 0.029589570956220983
training/nll: -2.9412930011749268
training/entropy: -2.727672815322876
training/temp_value: 0.00024987090420101486
time/total: 226201.19040226936

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1006
aug_traj/return: -112.0429771356482
aug_traj/length: 2000.0
time/training: 207.958838224411
training/train_loss_mean: -2.9


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1021
aug_traj/return: -121.39945618727111
aug_traj/length: 2000.0
time/training: 208.0349097251892
training/train_loss_mean: -2.887327377161698
training/train_loss_std: 0.029494974034068
training/nll: -2.92427659034729
training/entropy: -2.7686283588409424
training/temp_value: 0.0001610028210395207
time/total: 229756.2340312004

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1022
aug_traj/return: -116.8494710204257
aug_traj/length: 2000.0
time/training: 207.97748637199402
training/train_loss_mean: -2.8846402924969676
training/train_loss_std: 0.031650220456847226
training/nll: -2.883615255355835
training/entropy: -2.748798131942749
training/temp_value: 0.00015657701985999492
time/total: 229975.9211549759

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1023
aug_traj/return: -135.1455310545387
aug_traj/length: 2000.0
time/training: 207.97984671592712
training/train_loss_mean: -2.881912


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1038
aug_traj/return: -139.43483773582236
aug_traj/length: 2000.0
time/training: 207.9526116847992
training/train_loss_mean: -2.8603529403694314
training/train_loss_std: 0.024770569103374777
training/nll: -2.8356475830078125
training/entropy: -2.7285544872283936
training/temp_value: 0.00010115513502383517
time/total: 233514.44547748566

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1039
aug_traj/return: -133.49464541860175
aug_traj/length: 2000.0
time/training: 207.85883951187134
training/train_loss_mean: -2.863533809094249
training/train_loss_std: 0.02812748601461937
training/nll: -2.849982976913452
training/entropy: -2.720916748046875
training/temp_value: 9.847087251608832e-05
time/total: 233733.9918718338

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1040
aug_traj/return: -141.0884343035495
aug_traj/length: 2000.0
time/training: 208.1015646457672
training/train_loss_mean: -2.8


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1055
aug_traj/return: -158.0884752128085
aug_traj/length: 2000.0
time/training: 208.02066850662231
training/train_loss_mean: -2.876947526457269
training/train_loss_std: 0.02423684235124599
training/nll: -2.8790123462677
training/entropy: -2.6923344135284424
training/temp_value: 6.416704855981352e-05
time/total: 237289.694781065

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1056
aug_traj/return: -155.82118750755345
aug_traj/length: 2000.0
time/training: 208.18363237380981
training/train_loss_mean: -2.87568554948638
training/train_loss_std: 0.024703776105753465
training/nll: -2.8988659381866455
training/entropy: -2.7498645782470703
training/temp_value: 6.248461827760253e-05
time/total: 237509.79053092003

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1057
aug_traj/return: -203.61922926364642
aug_traj/length: 2000.0
time/training: 207.90100955963135
training/train_loss_mean: -2.8807


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1072
aug_traj/return: -196.36494409079842
aug_traj/length: 2000.0
time/training: 208.08246064186096
training/train_loss_mean: -2.9145897578794218
training/train_loss_std: 0.027209500694689893
training/nll: -2.9188413619995117
training/entropy: -2.740438938140869
training/temp_value: 4.114131443324548e-05
time/total: 241066.16564369202

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1073
aug_traj/return: -160.78261833032798
aug_traj/length: 2000.0
time/training: 207.99231457710266
training/train_loss_mean: -2.9147200431877467
training/train_loss_std: 0.027696199744849777
training/nll: -2.8808321952819824
training/entropy: -2.7413835525512695
training/temp_value: 4.007447617322743e-05
time/total: 241285.905002594

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1074
aug_traj/return: -171.85247865813068
aug_traj/length: 2000.0
time/training: 208.10875582695007
training/train_loss_mean: 


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1089
aug_traj/return: -177.27592126052906
aug_traj/length: 2000.0
time/training: 208.14105200767517
training/train_loss_mean: -2.9477600561711923
training/train_loss_std: 0.03013986811698576
training/nll: -3.0348405838012695
training/entropy: -2.8583507537841797
training/temp_value: 2.650527406894323e-05
time/total: 244824.25607466698

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1090
aug_traj/return: -195.70404576255913
aug_traj/length: 2000.0
time/training: 208.0993230342865
training/train_loss_mean: -2.9530838489135345
training/train_loss_std: 0.031501307275062965
training/nll: -2.929586410522461
training/entropy: -2.7425620555877686
training/temp_value: 2.5837478562103367e-05
evaluation/return_mean_gm: -220.61016310524968
evaluation/return_std_gm: 16.058042548648533
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.726242065429688
time/total: 245062.89917254448



Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1106
aug_traj/return: -181.81345914197468
aug_traj/length: 2000.0
time/training: 208.24223971366882
training/train_loss_mean: -2.9710161918882463
training/train_loss_std: 0.031884045641173274
training/nll: -3.005784273147583
training/entropy: -2.8758513927459717
training/temp_value: 1.703546851815964e-05
time/total: 248600.10086536407

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1107
aug_traj/return: -160.41492303542483
aug_traj/length: 2000.0
time/training: 208.41681790351868
training/train_loss_mean: -2.969382939745002
training/train_loss_std: 0.02867654249329623
training/nll: -2.992084503173828
training/entropy: -2.7804813385009766
training/temp_value: 1.6596385050029305e-05
time/total: 248820.47964262962

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1108
aug_traj/return: -150.79111052380296
aug_traj/length: 2000.0
time/training: 208.15446376800537
training/train_loss_mean: 


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1123
aug_traj/return: -188.7301176429739
aug_traj/length: 2000.0
time/training: 208.20896530151367
training/train_loss_mean: -2.993551175242052
training/train_loss_std: 0.03545057178361147
training/nll: -3.0292723178863525
training/entropy: -2.8409104347229004
training/temp_value: 1.0990832922363025e-05
time/total: 252378.31267261505

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1124
aug_traj/return: -156.96534297171422
aug_traj/length: 2000.0
time/training: 208.38962173461914
training/train_loss_mean: -2.99201046762646
training/train_loss_std: 0.03300430194943734
training/nll: -2.981421709060669
training/entropy: -2.8783137798309326
training/temp_value: 1.0712317214279099e-05
time/total: 252598.43803811073

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1125
aug_traj/return: -180.5334705351786
aug_traj/length: 2000.0
time/training: 208.0763235092163
training/train_loss_mean: -2.9


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1140
aug_traj/return: -124.04811148648477
aug_traj/length: 2000.0
time/training: 208.06610441207886
training/train_loss_mean: -3.0202744457200317
training/train_loss_std: 0.0351114674447118
training/nll: -3.025813579559326
training/entropy: -2.8955297470092773
training/temp_value: 7.174560868616432e-06
evaluation/return_mean_gm: -215.96373457024993
evaluation/return_std_gm: 8.912963470284245
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.76580238342285
time/total: 256154.14962530136

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1141
aug_traj/return: -105.21126068564834
aug_traj/length: 2000.0
time/training: 208.21644926071167
training/train_loss_mean: -3.0168498732650266
training/train_loss_std: 0.034255426599305874
training/nll: -3.0352702140808105
training/entropy: -2.9347617626190186
training/temp_value: 7.0032981520125916e-06
time/total: 256374.28787708282

M


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1157
aug_traj/return: -99.37178809954845
aug_traj/length: 2000.0
time/training: 208.32607555389404
training/train_loss_mean: -2.999181125488617
training/train_loss_std: 0.707149684463889
training/nll: -3.0522496700286865
training/entropy: -2.906013250350952
training/temp_value: 4.966191831844349e-06
time/total: 259910.74970650673

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1158
aug_traj/return: -105.91011803870224
aug_traj/length: 2000.0
time/training: 208.2974648475647
training/train_loss_mean: -2.999170926079083
training/train_loss_std: 0.5075906097061746
training/nll: -3.052626371383667
training/entropy: -2.945244789123535
training/temp_value: 4.834948337491109e-06
time/total: 260130.5891520977

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1159
aug_traj/return: -107.23870103469896
aug_traj/length: 2000.0
time/training: 208.1667718887329
training/train_loss_mean: -3.03541068


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1174
aug_traj/return: -104.0921809713698
aug_traj/length: 2000.0
time/training: 208.17493057250977
training/train_loss_mean: -2.9804150641899385
training/train_loss_std: 0.06521430190993119
training/nll: -2.8901448249816895
training/entropy: -2.7724812030792236
training/temp_value: 3.092701809816375e-06
time/total: 263687.75011467934

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1175
aug_traj/return: -100.78406865163109
aug_traj/length: 2000.0
time/training: 208.36156296730042
training/train_loss_mean: -2.9733667524355205
training/train_loss_std: 0.06311657141174296
training/nll: -2.941114664077759
training/entropy: -2.8768534660339355
training/temp_value: 3.0077418134765776e-06
time/total: 263907.8383922577

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1176
aug_traj/return: -108.66513315171623
aug_traj/length: 2000.0
time/training: 208.26036190986633
training/train_loss_mean: -


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1191
aug_traj/return: -109.01728652804952
aug_traj/length: 2000.0
time/training: 208.00784873962402
training/train_loss_mean: -2.8771910744077345
training/train_loss_std: 0.10020777611012506
training/nll: -2.8997223377227783
training/entropy: -2.7128407955169678
training/temp_value: 1.9191540373245854e-06
time/total: 267463.85343050957

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1192
aug_traj/return: -111.85079569324789
aug_traj/length: 2000.0
time/training: 208.13616228103638
training/train_loss_mean: -2.8636496453993496
training/train_loss_std: 0.10280249692360006
training/nll: -2.831268787384033
training/entropy: -2.7467169761657715
training/temp_value: 1.8645172807911818e-06
time/total: 267683.92031764984

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1193
aug_traj/return: -109.64266543174071
aug_traj/length: 2000.0
time/training: 208.10191774368286
training/train_loss_mean


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1208
aug_traj/return: -106.01930368091921
aug_traj/length: 2000.0
time/training: 208.1202211380005
training/train_loss_mean: -2.7795272665594477
training/train_loss_std: 0.08002824323208299
training/nll: -2.7062277793884277
training/entropy: -2.565690755844116
training/temp_value: 1.1940301050807787e-06
time/total: 271220.1684384346

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1209
aug_traj/return: -108.73849900904588
aug_traj/length: 2000.0
time/training: 208.10385012626648
training/train_loss_mean: -2.779233171144048
training/train_loss_std: 0.08217599608689598
training/nll: -2.8311026096343994
training/entropy: -2.7785770893096924
training/temp_value: 1.161699378526884e-06
time/total: 271440.0749361515

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1210
aug_traj/return: -109.42185006841433
aug_traj/length: 2000.0
time/training: 208.31902599334717
training/train_loss_mean: -2.


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1225
aug_traj/return: -109.89151764348146
aug_traj/length: 2000.0
time/training: 208.03245210647583
training/train_loss_mean: -2.6964445707814164
training/train_loss_std: 0.09300098172514153
training/nll: -2.6601295471191406
training/entropy: -2.4503467082977295
training/temp_value: 7.492876822084063e-07
time/total: 274995.9081144333

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1226
aug_traj/return: -107.78210752306374
aug_traj/length: 2000.0
time/training: 208.0370454788208
training/train_loss_mean: -2.6892166957880517
training/train_loss_std: 0.08558584144302349
training/nll: -2.6467132568359375
training/entropy: -2.604644536972046
training/temp_value: 7.290899276185093e-07
time/total: 275215.6906645298

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1227
aug_traj/return: -106.97406159487397
aug_traj/length: 2000.0
time/training: 208.00610899925232
training/train_loss_mean: -2.


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1242
aug_traj/return: -105.89798179712686
aug_traj/length: 2000.0
time/training: 208.05472874641418
training/train_loss_mean: -2.5969418037601555
training/train_loss_std: 0.12468284916055303
training/nll: -2.607898473739624
training/entropy: -2.40034556388855
training/temp_value: 4.7263507102951554e-07
time/total: 278770.0688099861

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1243
aug_traj/return: -107.31569314092224
aug_traj/length: 2000.0
time/training: 207.97180461883545
training/train_loss_mean: -2.579338374099945
training/train_loss_std: 0.1557530231135578
training/nll: -2.557481050491333
training/entropy: -2.541978597640991
training/temp_value: 4.598448020473244e-07
time/total: 278989.59900450706

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1244
aug_traj/return: -101.32905747570965
aug_traj/length: 2000.0
time/training: 208.25280284881592
training/train_loss_mean: -2.591


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1259
aug_traj/return: -111.9150036911341
aug_traj/length: 2000.0
time/training: 208.08463859558105
training/train_loss_mean: -2.4912160279468436
training/train_loss_std: 0.07199423928299555
training/nll: -2.627291202545166
training/entropy: -2.5117433071136475
training/temp_value: 2.989338116830023e-07
time/total: 282524.8569571972

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1260
aug_traj/return: -108.68796268022518
aug_traj/length: 2000.0
time/training: 208.10056900978088
training/train_loss_mean: -2.479832783187288
training/train_loss_std: 0.13800238406459558
training/nll: -2.448148012161255
training/entropy: -2.3860161304473877
training/temp_value: 2.911071103400799e-07
evaluation/return_mean_gm: -232.16591564815607
evaluation/return_std_gm: 0.12882442596386798
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.75312876701355
time/total: 282763.5214021206

Model


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1276
aug_traj/return: -112.1682693141192
aug_traj/length: 2000.0
time/training: 207.94384789466858
training/train_loss_mean: -2.3954278100028663
training/train_loss_std: 0.0752881224870783
training/nll: -2.447438955307007
training/entropy: -2.3047454357147217
training/temp_value: 1.9061885421556927e-07
time/total: 286297.7723789215

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1277
aug_traj/return: -106.22256075658458
aug_traj/length: 2000.0
time/training: 207.99528694152832
training/train_loss_mean: -2.3837543359255378
training/train_loss_std: 0.07099103999422027
training/nll: -2.435378313064575
training/entropy: -2.30446195602417
training/temp_value: 1.8570007541250918e-07
time/total: 286517.4775440693

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1278
aug_traj/return: -110.99575281940824
aug_traj/length: 2000.0
time/training: 208.38311791419983
training/train_loss_mean: -2.37


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1293
aug_traj/return: -114.49822514409966
aug_traj/length: 2000.0
time/training: 208.2026629447937
training/train_loss_mean: -2.3052864090306264
training/train_loss_std: 0.06982422466833749
training/nll: -2.2800774574279785
training/entropy: -2.1446540355682373
training/temp_value: 1.2302346221076333e-07
time/total: 290072.1143593788

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1294
aug_traj/return: -110.14424870750665
aug_traj/length: 2000.0
time/training: 207.98984289169312
training/train_loss_mean: -2.309048595834433
training/train_loss_std: 0.07231068026694927
training/nll: -2.2656428813934326
training/entropy: -2.1309545040130615
training/temp_value: 1.1993997231488352e-07
time/total: 290291.74709892273

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1295
aug_traj/return: -114.06011618114869
aug_traj/length: 2000.0
time/training: 208.00495290756226
training/train_loss_mean: 


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1310
aug_traj/return: -108.5185187358929
aug_traj/length: 2000.0
time/training: 207.96593928337097
training/train_loss_mean: -2.228440863958413
training/train_loss_std: 0.07503950408559208
training/nll: -2.1217663288116455
training/entropy: -2.061929702758789
training/temp_value: 8.058372589788916e-08
evaluation/return_mean_gm: -228.1863807078483
evaluation/return_std_gm: 0.16581312639228493
evaluation/length_mean_gm: 2000.0
evaluation/length_std_gm: 0.0
time/evaluation: 18.699021577835083
time/total: 293845.7101416588

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1311
aug_traj/return: -109.46360521848555
aug_traj/length: 2000.0
time/training: 207.97476601600647
training/train_loss_mean: -2.229451680251543
training/train_loss_std: 0.08183061947760985
training/nll: -2.1426286697387695
training/entropy: -2.0670325756073
training/temp_value: 7.863532060326496e-08
time/total: 294065.4884135723

Model sa


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1327
aug_traj/return: -107.50681649964699
aug_traj/length: 2000.0
time/training: 207.8190996646881
training/train_loss_mean: -2.136515930351696
training/train_loss_std: 0.07838343715124563
training/nll: -2.1772563457489014
training/entropy: -2.1398205757141113
training/temp_value: 5.345311754290513e-08
time/total: 297599.1785335541

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1328
aug_traj/return: -116.45452844511722
aug_traj/length: 2000.0
time/training: 208.00049829483032
training/train_loss_mean: -2.126827196613516
training/train_loss_std: 0.07152038232849266
training/nll: -2.094046115875244
training/entropy: -2.0627200603485107
training/temp_value: 5.2212826279089875e-08
time/total: 297818.9692595005

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1329
aug_traj/return: -109.30311120326371
aug_traj/length: 2000.0
time/training: 207.76435375213623
training/train_loss_mean: -2.1


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1344
aug_traj/return: -116.03569572688953
aug_traj/length: 2000.0
time/training: 207.9163167476654
training/train_loss_mean: -2.0160931325589964
training/train_loss_std: 0.06996026200959728
training/nll: -2.033996343612671
training/entropy: -2.091856002807617
training/temp_value: 3.608611481528468e-08
time/total: 301371.10664749146

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1345
aug_traj/return: -127.68955767626123
aug_traj/length: 2000.0
time/training: 208.06701731681824
training/train_loss_mean: -2.012752443927131
training/train_loss_std: 0.06553795500084918
training/nll: -1.9421173334121704
training/entropy: -1.929867148399353
training/temp_value: 3.5286682288072505e-08
time/total: 301590.7319293022

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1346
aug_traj/return: -124.83898711757945
aug_traj/length: 2000.0
time/training: 208.03516674041748
training/train_loss_mean: -1.9


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1361
aug_traj/return: -127.40891986760207
aug_traj/length: 2000.0
time/training: 207.76932787895203
training/train_loss_mean: -1.9608457411355396
training/train_loss_std: 0.042701539976115505
training/nll: -1.9765504598617554
training/entropy: -1.8794234991073608
training/temp_value: 2.4887088610932143e-08
time/total: 305143.2843179703

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1362
aug_traj/return: -124.78921320708719
aug_traj/length: 2000.0
time/training: 207.84685277938843
training/train_loss_mean: -1.9670896893982608
training/train_loss_std: 0.04394174563070603
training/nll: -1.9229508638381958
training/entropy: -1.8788608312606812
training/temp_value: 2.4383511102195387e-08
time/total: 305363.1781806946

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1363
aug_traj/return: -125.82429986389232
aug_traj/length: 2000.0
time/training: 207.88156604766846
training/train_loss_mean


Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1378
aug_traj/return: -126.43226832757273
aug_traj/length: 2000.0
time/training: 207.3406946659088
training/train_loss_mean: -2.073367413473427
training/train_loss_std: 0.04251750228833954
training/nll: -2.090355634689331
training/entropy: -2.0090487003326416
training/temp_value: 1.7905869410984588e-08
time/total: 308889.2657184601

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1379
aug_traj/return: -124.43435258361599
aug_traj/length: 2000.0
time/training: 207.41534185409546
training/train_loss_mean: -2.079204531968513
training/train_loss_std: 0.04439043237301598
training/nll: -2.1202642917633057
training/entropy: -2.027517080307007
training/temp_value: 1.7581665052063195e-08
time/total: 309108.47677469254

Model saved at ./exp/2023.04.20/200320-default/model.pt
Iteration 1380
aug_traj/return: -127.57948457669298
aug_traj/length: 2000.0
time/training: 207.49181199073792
training/train_loss_mean: -2.