In [1]:
#!cat ~/.bashrc

In [2]:
#!mv ../drone_dataset.pkl .

In [3]:
#!pip3 install --upgrade protobuf==3.20.0 

In [4]:
#!pip3 install transformers==4.5.1
#!pip3 install -U tokenizers
# The code below just solve many problems lol
#!pip3 uninstall tokenizers -y

In [5]:
from torch.utils.tensorboard import SummaryWriter
import argparse
import pickle
import random
import time
import gym
import d4rl
import torch
import numpy as np

import utils
from replay_buffer import ReplayBuffer
from lamb import Lamb
from stable_baselines3.common.vec_env import SubprocVecEnv
from pathlib import Path
from data import create_dataloader
from decision_transformer.models.decision_transformer import DecisionTransformer
from evaluation import create_vec_eval_episodes_fn, vec_evaluate_episode_rtg
from trainer import SequenceTrainer
from logger import Logger

from env import make_pytorch_env

MAX_EPISODE_LEN = 4000 # Warning: there is a similar variable in data.py! 

pybullet build time: May 20 2022 19:44:17


In [6]:
import sys
sys.argv = ['']

parser = argparse.ArgumentParser()
parser.add_argument("--seed", type=int, default=10)
parser.add_argument("--env", type=str, default="drone_dataset")
#parser.add_argument("--env", type=str, default="antmaze-large-diverse-v2")

# model options
#parser.add_argument("--K", type=int, default=20)
#parser.add_argument("--K", type=int, default=200)
parser.add_argument("--K", type=int, default=80)
parser.add_argument("--embed_dim", type=int, default=512)
parser.add_argument("--n_layer", type=int, default=4)
parser.add_argument("--n_head", type=int, default=4)
parser.add_argument("--activation_function", type=str, default="relu")
parser.add_argument("--dropout", type=float, default=0.1)
#parser.add_argument("--eval_context_length", type=int, default=5)
#parser.add_argument("--eval_context_length", type=int, default=50)
parser.add_argument("--eval_context_length", type=int, default=20)
# 0: no pos embedding others: absolute ordering
parser.add_argument("--ordering", type=int, default=0)

# shared evaluation options
parser.add_argument("--eval_rtg", type=int, default=3600)
parser.add_argument("--num_eval_episodes", type=int, default=10)

# shared training options
parser.add_argument("--init_temperature", type=float, default=0.1)
#parser.add_argument("--batch_size", type=int, default=256)
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--learning_rate", "-lr", type=float, default=1e-4)
parser.add_argument("--weight_decay", "-wd", type=float, default=5e-4)
parser.add_argument("--warmup_steps", type=int, default=10000)

# pretraining options
parser.add_argument("--max_pretrain_iters", type=int, default=1)
parser.add_argument("--num_updates_per_pretrain_iter", type=int, default=5000)

# finetuning options
parser.add_argument("--max_online_iters", type=int, default=1500)
parser.add_argument("--online_rtg", type=int, default=7200)
parser.add_argument("--num_online_rollouts", type=int, default=1)
parser.add_argument("--replay_size", type=int, default=1000)
parser.add_argument("--num_updates_per_online_iter", type=int, default=300)
parser.add_argument("--eval_interval", type=int, default=10)

# environment options
parser.add_argument("--device", type=str, default="cuda")
parser.add_argument("--log_to_tb", "-w", type=bool, default=True)
parser.add_argument("--save_dir", type=str, default="./exp")
parser.add_argument("--exp_name", type=str, default="default")

args = parser.parse_args()

In [7]:
class Experiment:
    def __init__(self, variant):

        self.state_dim, self.act_dim, self.action_range = self._get_env_spec(variant)
        self.offline_trajs, self.state_mean, self.state_std = self._load_dataset(
            variant["env"]
        )
        # initialize by offline trajs
        self.replay_buffer = ReplayBuffer(variant["replay_size"], self.offline_trajs)

        self.aug_trajs = []

        self.device = variant.get("device", "cuda")
        self.target_entropy = -self.act_dim
        self.model = DecisionTransformer(
            state_dim=self.state_dim,
            act_dim=self.act_dim,
            action_range=self.action_range,
            max_length=variant["K"],
            eval_context_length=variant["eval_context_length"],
            max_ep_len=MAX_EPISODE_LEN,
            hidden_size=variant["embed_dim"],
            n_layer=variant["n_layer"],
            n_head=variant["n_head"],
            n_inner=4 * variant["embed_dim"],
            activation_function=variant["activation_function"],
            n_positions=1024,
            resid_pdrop=variant["dropout"],
            attn_pdrop=variant["dropout"],
            stochastic_policy=True,
            ordering=variant["ordering"],
            init_temperature=variant["init_temperature"],
            target_entropy=self.target_entropy,
        ).to(device=self.device)

        self.optimizer = Lamb(
            self.model.parameters(),
            lr=variant["learning_rate"],
            weight_decay=variant["weight_decay"],
            eps=1e-8,
        )
        self.scheduler = torch.optim.lr_scheduler.LambdaLR(
            self.optimizer, lambda steps: min((steps + 1) / variant["warmup_steps"], 1)
        )

        self.log_temperature_optimizer = torch.optim.Adam(
            [self.model.log_temperature],
            lr=1e-4,
            betas=[0.9, 0.999],
        )

        # track the training progress and
        # training/evaluation/online performance in all the iterations
        self.pretrain_iter = 0
        self.online_iter = 0
        self.total_transitions_sampled = 0
        self.variant = variant
        self.reward_scale = 1.0 if "antmaze" in variant["env"] else 0.001
        self.logger = Logger(variant)

    def _get_env_spec(self, variant):
        #####env = gym.make(variant["env"])
        env = make_pytorch_env(args)
        state_dim = env.observation_space.shape[0]
        act_dim = env.action_space.shape[0]
        #action_range = [-0.999999, 0.999999]
        
        action_range = [
            float(env.action_space.low.min()) + 1e-6,
            float(env.action_space.high.max()) - 1e-6,
        ]
        
        print("action_range: {}".format(action_range))
        env.close()
        return state_dim, act_dim, action_range

    def _save_model(self, path_prefix, is_pretrain_model=False):
        to_save = {
            "model_state_dict": self.model.state_dict(),
            "optimizer_state_dict": self.optimizer.state_dict(),
            "scheduler_state_dict": self.scheduler.state_dict(),
            "pretrain_iter": self.pretrain_iter,
            "online_iter": self.online_iter,
            "args": self.variant,
            "total_transitions_sampled": self.total_transitions_sampled,
            "np": np.random.get_state(),
            "python": random.getstate(),
            "pytorch": torch.get_rng_state(),
            "log_temperature_optimizer_state_dict": self.log_temperature_optimizer.state_dict(),
        }

        with open(f"{path_prefix}/model.pt", "wb") as f:
            torch.save(to_save, f)
        print(f"\nModel saved at {path_prefix}/model.pt")

        if is_pretrain_model:
            with open(f"{path_prefix}/pretrain_model.pt", "wb") as f:
                torch.save(to_save, f)
            print(f"Model saved at {path_prefix}/pretrain_model.pt")

    def _load_model(self, path_prefix):
        if Path(f"{path_prefix}/model.pt").exists():
            with open(f"{path_prefix}/model.pt", "rb") as f:
                checkpoint = torch.load(f)
            self.model.load_state_dict(checkpoint["model_state_dict"])
            self.optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
            self.scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
            self.log_temperature_optimizer.load_state_dict(
                checkpoint["log_temperature_optimizer_state_dict"]
            )
            self.pretrain_iter = checkpoint["pretrain_iter"]
            self.online_iter = checkpoint["online_iter"]
            self.total_transitions_sampled = checkpoint["total_transitions_sampled"]
            np.random.set_state(checkpoint["np"])
            random.setstate(checkpoint["python"])
            torch.set_rng_state(checkpoint["pytorch"])
            print(f"Model loaded at {path_prefix}/model.pt")

    def _load_dataset(self, env_name):

        dataset_path = f"./data/{env_name}.pkl"
        with open(dataset_path, "rb") as f:
            trajectories = pickle.load(f)

        states, traj_lens, returns = [], [], []
        for path in trajectories:
            states.append(path["observations"])
            traj_lens.append(len(path["observations"]))
            returns.append(path["rewards"].sum())
        traj_lens, returns = np.array(traj_lens), np.array(returns)

        # used for input normalization
        states = np.concatenate(states, axis=0)
        state_mean, state_std = np.mean(states, axis=0), np.std(states, axis=0) + 1e-6
        num_timesteps = sum(traj_lens)

        print("=" * 50)
        print(f"Starting new experiment: {env_name}")
        print(f"{len(traj_lens)} trajectories, {num_timesteps} timesteps found")
        print(f"Average return: {np.mean(returns):.2f}, std: {np.std(returns):.2f}")
        print(f"Max return: {np.max(returns):.2f}, min: {np.min(returns):.2f}")
        print(f"Average length: {np.mean(traj_lens):.2f}, std: {np.std(traj_lens):.2f}")
        print(f"Max length: {np.max(traj_lens):.2f}, min: {np.min(traj_lens):.2f}")
        print("=" * 50)

        sorted_inds = np.argsort(returns)  # lowest to highest
        num_trajectories = 1
        timesteps = traj_lens[sorted_inds[-1]]
        ind = len(trajectories) - 2
        while ind >= 0 and timesteps + traj_lens[sorted_inds[ind]] < num_timesteps:
            timesteps += traj_lens[sorted_inds[ind]]
            num_trajectories += 1
            ind -= 1
        sorted_inds = sorted_inds[-num_trajectories:]
        trajectories = [trajectories[ii] for ii in sorted_inds]

        return trajectories, state_mean, state_std

    def _augment_trajectories(
        self,
        online_envs,
        target_explore,
        n,
        randomized=False,
    ):

        max_ep_len = MAX_EPISODE_LEN

        with torch.no_grad():
            # generate init state
            target_return = [target_explore * self.reward_scale] * online_envs.num_envs

            returns, lengths, trajs = vec_evaluate_episode_rtg(
                online_envs,
                self.state_dim,
                self.act_dim,
                self.model,
                max_ep_len=max_ep_len,
                reward_scale=self.reward_scale,
                target_return=target_return,
                mode="normal",
                state_mean=self.state_mean,
                state_std=self.state_std,
                device=self.device,
                use_mean=False,
            )

        self.replay_buffer.add_new_trajs(trajs)
        self.aug_trajs += trajs
        self.total_transitions_sampled += np.sum(lengths)

        return {
            "aug_traj/return": np.mean(returns),
            "aug_traj/length": np.mean(lengths),
        }

    def pretrain(self, eval_envs, loss_fn):
        print("\n\n\n*** Pretrain ***")
        print("----------------")
        print("eval_envs: {}".format(eval_envs))
        print("loss_fn: {}".format(loss_fn))
        
        eval_fns = [
            create_vec_eval_episodes_fn(
                vec_env=eval_envs,
                eval_rtg=self.variant["eval_rtg"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                device=self.device,
                use_mean=True,
                reward_scale=self.reward_scale,
            )
        ]

        trainer = SequenceTrainer(
            model=self.model,
            optimizer=self.optimizer,
            log_temperature_optimizer=self.log_temperature_optimizer,
            scheduler=self.scheduler,
            device=self.device,
        )

        writer = (
            SummaryWriter(self.logger.log_path) if self.variant["log_to_tb"] else None
        )
        while self.pretrain_iter < self.variant["max_pretrain_iters"]:
            # in every iteration, prepare the data loader
            dataloader = create_dataloader(
                trajectories=self.offline_trajs,
                num_iters=self.variant["num_updates_per_pretrain_iter"],
                batch_size=self.variant["batch_size"],
                max_len=self.variant["K"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                reward_scale=self.reward_scale,
                action_range=self.action_range,
            )

            train_outputs = trainer.train_iteration(
                loss_fn=loss_fn,
                dataloader=dataloader,
            )
            eval_outputs, eval_reward = self.evaluate(eval_fns)
            outputs = {"time/total": time.time() - self.start_time}
            outputs.update(train_outputs)
            outputs.update(eval_outputs)
            self.logger.log_metrics(
                outputs,
                iter_num=self.pretrain_iter,
                total_transitions_sampled=self.total_transitions_sampled,
                writer=writer,
            )

            self._save_model(
                path_prefix=self.logger.log_path,
                is_pretrain_model=True,
            )

            self.pretrain_iter += 1

    def evaluate(self, eval_fns):
        eval_start = time.time()
        self.model.eval()
        outputs = {}
        for eval_fn in eval_fns:
            o = eval_fn(self.model)
            outputs.update(o)
        outputs["time/evaluation"] = time.time() - eval_start

        eval_reward = outputs["evaluation/return_mean_gm"]
        return outputs, eval_reward

    def online_tuning(self, online_envs, eval_envs, loss_fn):

        print("\n\n\n*** Online Finetuning ***")

        trainer = SequenceTrainer(
            model=self.model,
            optimizer=self.optimizer,
            log_temperature_optimizer=self.log_temperature_optimizer,
            scheduler=self.scheduler,
            device=self.device,
        )
        eval_fns = [
            create_vec_eval_episodes_fn(
                vec_env=eval_envs,
                eval_rtg=self.variant["eval_rtg"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                device=self.device,
                use_mean=True,
                reward_scale=self.reward_scale,
            )
        ]
        writer = (
            SummaryWriter(self.logger.log_path) if self.variant["log_to_tb"] else None
        )
        while self.online_iter < self.variant["max_online_iters"]:

            outputs = {}
            augment_outputs = self._augment_trajectories(
                online_envs,
                self.variant["online_rtg"],
                n=self.variant["num_online_rollouts"],
            )
            outputs.update(augment_outputs)

            dataloader = create_dataloader(
                trajectories=self.replay_buffer.trajectories,
                num_iters=self.variant["num_updates_per_online_iter"],
                batch_size=self.variant["batch_size"],
                max_len=self.variant["K"],
                state_dim=self.state_dim,
                act_dim=self.act_dim,
                state_mean=self.state_mean,
                state_std=self.state_std,
                reward_scale=self.reward_scale,
                action_range=self.action_range,
            )

            # finetuning
            is_last_iter = self.online_iter == self.variant["max_online_iters"] - 1
            if (self.online_iter + 1) % self.variant[
                "eval_interval"
            ] == 0 or is_last_iter:
                evaluation = True
            else:
                evaluation = False

            train_outputs = trainer.train_iteration(
                loss_fn=loss_fn,
                dataloader=dataloader,
            )
            outputs.update(train_outputs)

            if evaluation:
                eval_outputs, eval_reward = self.evaluate(eval_fns)
                outputs.update(eval_outputs)

            outputs["time/total"] = time.time() - self.start_time

            # log the metrics
            self.logger.log_metrics(
                outputs,
                iter_num=self.pretrain_iter + self.online_iter,
                total_transitions_sampled=self.total_transitions_sampled,
                writer=writer,
            )

            self._save_model(
                path_prefix=self.logger.log_path,
                is_pretrain_model=False,
            )

            self.online_iter += 1

    def __call__(self):

        utils.set_seed_everywhere(args.seed)

        import d4rl

        def loss_fn(
            a_hat_dist,     # action_preds
            a,              # action_target
            attention_mask, # padding_mask
            entropy_reg,    # self.model.temperature().detach()
        ):
            # a_hat is a SquashedNormal Distribution
            log_likelihood = a_hat_dist.log_likelihood(a)[attention_mask > 0].mean()
            
            entropy = a_hat_dist.entropy().mean()
            loss = -(log_likelihood + entropy_reg * entropy)
            
            '''
            print("a_hat_dist : {}".format(a_hat_dist))
            print("a : {}".format(a))
            torch.save(a,"a.pt")
            print("a_hat_dist.log_likelihood(a) : {}".format(a_hat_dist.log_likelihood(a)))
            #print("attention_mask : {}".format(attention_mask))
            print("log_likelihood: {}".format(log_likelihood))
            print("loss inside jupyter: {} of type: {}".format(loss,type(loss)))
            '''
            
            return (
                loss,
                -log_likelihood,
                entropy,
            )

        def get_env_builder(seed, env_name, target_goal=None):
            def make_env_fn():
                import d4rl

                #####env = gym.make(env_name)
                env = make_pytorch_env(args)
                env.seed(seed)
                '''
                if hasattr(env.env, "wrapped_env"):
                    env.env.wrapped_env.seed(seed)
                elif hasattr(env.env, "seed"):
                    env.env.seed(seed)
                else:
                    pass
                '''
                '''
                env.action_space.seed(seed)
                env.observation_space.seed(seed)
                '''

                if target_goal:
                    env.set_target_goal(target_goal)
                    print(f"Set the target goal to be {env.target_goal}")
                return env

            return make_env_fn

        print("\n\nMaking Eval Env.....")
        env_name = self.variant["env"]
        if "antmaze" in env_name:
            env = gym.make(env_name)
            target_goal = env.target_goal
            env.close()
            print(f"Generated the fixed target goal: {target_goal}")
        else:
            target_goal = None
        eval_envs = SubprocVecEnv(
            [
                get_env_builder(i, env_name=env_name, target_goal=target_goal)
                for i in range(self.variant["num_eval_episodes"])
            ]
        )

        self.start_time = time.time()
        if self.variant["max_pretrain_iters"]:
            self.pretrain(eval_envs, loss_fn)

        if self.variant["max_online_iters"]:
            print("\n\nMaking Online Env.....")
            online_envs = SubprocVecEnv(
                [
                    get_env_builder(i + 100, env_name=env_name, target_goal=target_goal)
                    for i in range(self.variant["num_online_rollouts"])
                ]
            )
            self.online_tuning(online_envs, eval_envs, loss_fn)
            online_envs.close()

        eval_envs.close()

In [8]:
utils.set_seed_everywhere(args.seed)
experiment = Experiment(vars(args))

print("=" * 50)
experiment()

  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")


action_range: [-0.999999, 0.999999]
Starting new experiment: drone_dataset
1544 trajectories, 3497627 timesteps found
Average return: -29.00, std: 2362.99
Max return: 3362.87, min: -5541.95
Average length: 2265.30, std: 1012.84
Max length: 4001.00, min: 919.00
Experiment log path: ./exp/2023.03.20/222713-default


Making Eval Env.....


pybullet build time: May 20 2022 19:44:17





*** Pretrain ***
----------------
eval_envs: <stable_baselines3.common.vec_env.subproc_vec_env.SubprocVecEnv object at 0x7fcf9ff14e80>
loss_fn: <function Experiment.__call__.<locals>.loss_fn at 0x7fcf9fe4daf0>


pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17
pybullet build time: May 20 2022 19:44:17


Iteration 0
time/total: 938.615795135498
time/training: 875.0577943325043
training/train_loss_mean: 1096.3028951166154
training/train_loss_std: 3881.1370204623636
training/nll: -2.6552021503448486
training/entropy: -3.303985118865967
training/temp_value: 0.10309769458572161
evaluation/return_mean_gm: 76.60181316786162
evaluation/return_std_gm: 114.52507420524498
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.543792724609375

Model saved at ./exp/2023.03.20/222713-default/model.pt
Model saved at ./exp/2023.03.20/222713-default/pretrain_model.pt


Making Online Env.....


pybullet build time: May 20 2022 19:44:17





*** Online Finetuning ***
Iteration 1
aug_traj/return: -47913.26283873773
aug_traj/length: 4000.0
time/training: 52.833747148513794
training/train_loss_mean: 0.4556875220896309
training/train_loss_std: 16.428846398700784
training/nll: -2.870779037475586
training/entropy: -3.5203208923339844
training/temp_value: 0.1063037348704993
time/total: 1006.2241275310516

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 2
aug_traj/return: -47841.65691087387
aug_traj/length: 4000.0
time/training: 52.881046533584595
training/train_loss_mean: -2.24659932437544
training/train_loss_std: 1.0916595189097724
training/nll: -2.6275408267974854
training/entropy: -3.6736302375793457
training/temp_value: 0.1089081764498145
time/total: 1070.892729997635

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 3
aug_traj/return: -47851.29184491655
aug_traj/length: 4000.0
time/training: 52.73223805427551
training/train_loss_mean: -2.5893069793788612
training/train_loss_std: 0.303674


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 18
aug_traj/return: -47533.08075532943
aug_traj/length: 4000.0
time/training: 52.67662477493286
training/train_loss_mean: -3.8728482255514574
training/train_loss_std: 0.3405825920841614
training/nll: -5.021806716918945
training/entropy: -4.564475059509277
training/temp_value: 0.18662870766281967
time/total: 2169.574431180954

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 19
aug_traj/return: -47960.59923531774
aug_traj/length: 4000.0
time/training: 52.76882839202881
training/train_loss_mean: -3.935284792567732
training/train_loss_std: 0.32838914373803224
training/nll: -5.091363430023193
training/entropy: -4.44948673248291
training/temp_value: 0.19345239632568634
time/total: 2234.1636781692505

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 20
aug_traj/return: -47908.956004291635
aug_traj/length: 4000.0
time/training: 52.86036539077759
training/train_loss_mean: -3.985232434454948
tra


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 35
aug_traj/return: -37747.814848467664
aug_traj/length: 4000.0
time/training: 52.772868394851685
training/train_loss_mean: -4.27360983441597
training/train_loss_std: 0.34605036521470844
training/nll: -6.025653839111328
training/entropy: -4.455297946929932
training/temp_value: 0.3342921852612716
time/total: 3396.795501470566

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 36
aug_traj/return: -29156.226470154535
aug_traj/length: 4000.0
time/training: 52.80715847015381
training/train_loss_mean: -4.281266978643652
training/train_loss_std: 0.36456046985495066
training/nll: -5.635491847991943
training/entropy: -4.457695007324219
training/temp_value: 0.3455044618451939
time/total: 3461.433414697647

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 37
aug_traj/return: -42435.231277336316
aug_traj/length: 4000.0
time/training: 52.7632110118866
training/train_loss_mean: -4.268922505690748
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 52
aug_traj/return: -41486.7439049719
aug_traj/length: 4000.0
time/training: 52.76397442817688
training/train_loss_mean: -3.822303001780553
training/train_loss_std: 0.32593993598181076
training/nll: -6.2714385986328125
training/entropy: -4.481863498687744
training/temp_value: 0.5800659073411372
time/total: 4624.210898399353

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 53
aug_traj/return: -38297.1708848042
aug_traj/length: 4000.0
time/training: 52.87213182449341
training/train_loss_mean: -3.7848417157401406
training/train_loss_std: 0.3612842044777128
training/nll: -7.423919200897217
training/entropy: -4.838709831237793
training/temp_value: 0.5984373584841122
time/total: 4688.940630912781

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 54
aug_traj/return: -42249.24369130335
aug_traj/length: 4000.0
time/training: 52.78229522705078
training/train_loss_mean: -3.721182260795393
trainin


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 69
aug_traj/return: -16996.762735103293
aug_traj/length: 4000.0
time/training: 52.91413903236389
training/train_loss_mean: -2.7744693647359546
training/train_loss_std: 0.32371528565662744
training/nll: -6.087650299072266
training/entropy: -3.3878233432769775
training/temp_value: 0.9525555141562488
time/total: 5787.951381921768

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 70
aug_traj/return: -34770.97285374873
aug_traj/length: 4000.0
time/training: 52.784480571746826
training/train_loss_mean: -2.7740135963440737
training/train_loss_std: 0.2890457524639863
training/nll: -5.612180233001709
training/entropy: -3.8732800483703613
training/temp_value: 0.9732969109207716
evaluation/return_mean_gm: 113.90812024961215
evaluation/return_std_gm: 21.382900399267562
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.75489044189453
time/total: 5916.369432687759

Model saved at ./e


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 86
aug_traj/return: -47698.77736255322
aug_traj/length: 4000.0
time/training: 52.83928418159485
training/train_loss_mean: -2.8197715128754384
training/train_loss_std: 0.32685346184459985
training/nll: -6.091694355010986
training/entropy: -2.956421375274658
training/temp_value: 1.0276517867018873
time/total: 7016.944418668747

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 87
aug_traj/return: -37877.78551183613
aug_traj/length: 4000.0
time/training: 52.96936821937561
training/train_loss_mean: -2.8305238200042036
training/train_loss_std: 0.30530887721469097
training/nll: -5.872847080230713
training/entropy: -3.1006572246551514
training/temp_value: 1.0296916906594564
time/total: 7081.7828373909

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 88
aug_traj/return: -31468.805833241317
aug_traj/length: 4000.0
time/training: 52.93302893638611
training/train_loss_mean: -2.8393442094170305
tra


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 103
aug_traj/return: -8275.086508445507
aug_traj/length: 4000.0
time/training: 52.77609658241272
training/train_loss_mean: -2.9058542215135454
training/train_loss_std: 0.2947978377379497
training/nll: -6.769189357757568
training/entropy: -3.6416375637054443
training/temp_value: 1.0329781550202979
time/total: 8246.383145570755

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 104
aug_traj/return: -5678.277740495604
aug_traj/length: 4000.0
time/training: 52.849127769470215
training/train_loss_mean: -2.8836710890459534
training/train_loss_std: 0.3052780182031745
training/nll: -5.426985740661621
training/entropy: -2.657968759536743
training/temp_value: 1.0332777262966617
time/total: 8311.40023446083

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 105
aug_traj/return: -6076.22378713888
aug_traj/length: 4000.0
time/training: 52.78228735923767
training/train_loss_mean: -2.8833684099545573
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 120
aug_traj/return: -7505.794285442487
aug_traj/length: 4000.0
time/training: 52.8665497303009
training/train_loss_mean: -2.88038778899517
training/train_loss_std: 0.32427178631954645
training/nll: -6.182093620300293
training/entropy: -3.1340084075927734
training/temp_value: 1.0332692226409679
evaluation/return_mean_gm: -8644.712391790865
evaluation/return_std_gm: 191.25413430571834
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.698776721954346
time/total: 9477.04295039177

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 121
aug_traj/return: -5738.223689317854
aug_traj/length: 4000.0
time/training: 53.023582458496094
training/train_loss_mean: -2.914719854601656
training/train_loss_std: 0.32175433465931896
training/nll: -6.071325778961182
training/entropy: -2.892476797103882
training/temp_value: 1.0352579212531563
time/total: 9542.5167927742

Model saved at ./exp/20


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 137
aug_traj/return: -5879.469883957996
aug_traj/length: 4000.0
time/training: 52.87571310997009
training/train_loss_mean: -2.8881650107984287
training/train_loss_std: 0.35243831747744186
training/nll: -6.7438788414001465
training/entropy: -3.320664644241333
training/temp_value: 1.037200207178938
time/total: 10644.299298286438

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 138
aug_traj/return: -5403.051414413482
aug_traj/length: 4000.0
time/training: 52.98192834854126
training/train_loss_mean: -2.878361331399918
training/train_loss_std: 0.31528450597635216
training/nll: -5.6938395500183105
training/entropy: -2.312518835067749
training/temp_value: 1.0394656539281462
time/total: 10709.194578647614

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 139
aug_traj/return: -5008.996597576687
aug_traj/length: 4000.0
time/training: 52.891390562057495
training/train_loss_mean: -2.89309914752728


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 154
aug_traj/return: -5605.282119047981
aug_traj/length: 4000.0
time/training: 53.03956699371338
training/train_loss_mean: -2.8189531977474926
training/train_loss_std: 0.3080023047054139
training/nll: -6.3653669357299805
training/entropy: -3.050523042678833
training/temp_value: 1.0337729592607703
time/total: 11875.226544380188

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 155
aug_traj/return: -9790.03457794252
aug_traj/length: 4000.0
time/training: 52.95003390312195
training/train_loss_mean: -2.8401912479719926
training/train_loss_std: 0.351372468334085
training/nll: -6.441105365753174
training/entropy: -3.166644334793091
training/temp_value: 1.0333077592295359
time/total: 11940.137031555176

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 156
aug_traj/return: -6963.825799843988
aug_traj/length: 4000.0
time/training: 53.03864860534668
training/train_loss_mean: -2.8032601562620374
t


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 171
aug_traj/return: -11134.690173740564
aug_traj/length: 4000.0
time/training: 52.86877393722534
training/train_loss_mean: -2.7626788948188756
training/train_loss_std: 0.3555900047806168
training/nll: -5.009807586669922
training/entropy: -2.5792627334594727
training/temp_value: 1.0354523612781237
time/total: 13107.702182531357

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 172
aug_traj/return: -5234.912203140637
aug_traj/length: 4000.0
time/training: 53.03924894332886
training/train_loss_mean: -2.7815758981474628
training/train_loss_std: 0.31697432362047145
training/nll: -5.883021354675293
training/entropy: -2.7780425548553467
training/temp_value: 1.0364101112952877
time/total: 13172.440287590027

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 173
aug_traj/return: -7690.583047504943
aug_traj/length: 4000.0
time/training: 52.99782586097717
training/train_loss_mean: -2.7502369784362


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 188
aug_traj/return: -9001.252049372153
aug_traj/length: 4000.0
time/training: 52.93061709403992
training/train_loss_mean: -2.6926553579517027
training/train_loss_std: 0.43030385107127295
training/nll: -6.4944353103637695
training/entropy: -3.4517605304718018
training/temp_value: 1.032252707208967
time/total: 14275.494014978409

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 189
aug_traj/return: -8104.965141779823
aug_traj/length: 4000.0
time/training: 53.09495997428894
training/train_loss_mean: -2.728088481443819
training/train_loss_std: 0.3539946234526457
training/nll: -4.579775810241699
training/entropy: -2.1048550605773926
training/temp_value: 1.033128894989147
time/total: 14340.832012414932

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 190
aug_traj/return: -7102.350875590951
aug_traj/length: 4000.0
time/training: 52.87883996963501
training/train_loss_mean: -2.692547342700342



Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 205
aug_traj/return: -7581.831393957666
aug_traj/length: 4000.0
time/training: 52.90880727767944
training/train_loss_mean: -2.6414709383918753
training/train_loss_std: 0.337830920248991
training/nll: -5.788905143737793
training/entropy: -3.2441906929016113
training/temp_value: 1.02790609994235
time/total: 15509.019654750824

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 206
aug_traj/return: -8957.611798906579
aug_traj/length: 4000.0
time/training: 53.00778651237488
training/train_loss_mean: -2.6811053809552345
training/train_loss_std: 0.32363065506577676
training/nll: -5.924252033233643
training/entropy: -3.3982746601104736
training/temp_value: 1.0289171223453388
time/total: 15573.675294160843

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 207
aug_traj/return: -8794.883174090739
aug_traj/length: 4000.0
time/training: 52.991735219955444
training/train_loss_mean: -2.6771509274966965


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 222
aug_traj/return: -10261.554319931696
aug_traj/length: 4000.0
time/training: 53.32621097564697
training/train_loss_mean: -2.631526239047848
training/train_loss_std: 0.32354498023850703
training/nll: -5.749794006347656
training/entropy: -3.0475738048553467
training/temp_value: 1.0309211098887887
time/total: 16742.36449289322

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 223
aug_traj/return: -8252.089921302968
aug_traj/length: 4000.0
time/training: 52.98282074928284
training/train_loss_mean: -2.596903030386162
training/train_loss_std: 0.34152141450684176
training/nll: -5.531474590301514
training/entropy: -3.212693691253662
training/temp_value: 1.0309663551544257
time/total: 16807.129688501358

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 224
aug_traj/return: -6462.2073279256165
aug_traj/length: 4000.0
time/training: 52.83396100997925
training/train_loss_mean: -2.597403048440191


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 239
aug_traj/return: -8410.611333583847
aug_traj/length: 4000.0
time/training: 52.96834397315979
training/train_loss_mean: -2.5605349462270595
training/train_loss_std: 0.3416122787043185
training/nll: -5.184113502502441
training/entropy: -2.7399022579193115
training/temp_value: 1.025363081562412
time/total: 17910.000996351242

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 240
aug_traj/return: -8038.736460388546
aug_traj/length: 4000.0
time/training: 53.07616996765137
training/train_loss_mean: -2.5635126172006633
training/train_loss_std: 0.334125488043787
training/nll: -5.22213888168335
training/entropy: -2.8034253120422363
training/temp_value: 1.0251788324151792
evaluation/return_mean_gm: -3890.4017174086416
evaluation/return_std_gm: 4903.618380978252
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.79551601409912
time/total: 18038.906710863113

Model saved at ./exp


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 256
aug_traj/return: -9157.42819728461
aug_traj/length: 4000.0
time/training: 52.858296155929565
training/train_loss_mean: -2.5129659861930245
training/train_loss_std: 0.325371292615417
training/nll: -6.415258884429932
training/entropy: -3.327091932296753
training/temp_value: 1.0240380618410139
time/total: 19145.00848555565

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 257
aug_traj/return: -8179.834622233476
aug_traj/length: 4000.0
time/training: 53.02097129821777
training/train_loss_mean: -2.49063963244853
training/train_loss_std: 0.34967915021038554
training/nll: -5.354522705078125
training/entropy: -2.9281702041625977
training/temp_value: 1.0216369106855232
time/total: 19209.96609377861

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 258
aug_traj/return: -7498.018139416886
aug_traj/length: 4000.0
time/training: 52.885278940200806
training/train_loss_mean: -2.5130184774556077
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 273
aug_traj/return: -9106.514997303026
aug_traj/length: 4000.0
time/training: 53.03505516052246
training/train_loss_mean: -2.477166835704278
training/train_loss_std: 0.35054879256138
training/nll: -5.377047538757324
training/entropy: -3.136397123336792
training/temp_value: 1.023191527026305
time/total: 20380.243970394135

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 274
aug_traj/return: -9347.206146392407
aug_traj/length: 4000.0
time/training: 52.904435873031616
training/train_loss_mean: -2.462374715625487
training/train_loss_std: 0.406767028481865
training/nll: -6.554502964019775
training/entropy: -2.7852871417999268
training/temp_value: 1.0239441260763094
time/total: 20445.258104085922

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 275
aug_traj/return: -10525.757533857517
aug_traj/length: 4000.0
time/training: 52.912110567092896
training/train_loss_mean: -2.4206861001405584
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 290
aug_traj/return: -8133.711578980095
aug_traj/length: 4000.0
time/training: 53.00549292564392
training/train_loss_mean: -2.39403145442119
training/train_loss_std: 0.30478738509999836
training/nll: -6.075671672821045
training/entropy: -3.6699414253234863
training/temp_value: 1.0198286509915364
evaluation/return_mean_gm: -465.97229019844417
evaluation/return_std_gm: 75.69987687646382
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.73283338546753
time/total: 21612.96482872963

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 291
aug_traj/return: -7858.67524990072
aug_traj/length: 4000.0
time/training: 53.0259952545166
training/train_loss_mean: -2.394801556316902
training/train_loss_std: 0.3204258430805946
training/nll: -5.802738189697266
training/entropy: -3.1431448459625244
training/temp_value: 1.0173679037879055
time/total: 21677.784415483475

Model saved at ./exp/2


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 307
aug_traj/return: -8891.559964254178
aug_traj/length: 4000.0
time/training: 53.14297270774841
training/train_loss_mean: -2.349637662571389
training/train_loss_std: 0.29265909689132324
training/nll: -5.501786708831787
training/entropy: -3.0680148601531982
training/temp_value: 1.015944501611311
time/total: 22784.704658269882

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 308
aug_traj/return: -7948.94210573708
aug_traj/length: 4000.0
time/training: 53.01218104362488
training/train_loss_mean: -2.3222167924339114
training/train_loss_std: 0.34080295202577054
training/nll: -3.8481621742248535
training/entropy: -1.819202184677124
training/temp_value: 1.013829829571027
time/total: 22849.652314901352

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 309
aug_traj/return: -7113.313450101816
aug_traj/length: 4000.0
time/training: 52.95122051239014
training/train_loss_mean: -2.362361049204753
t


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 324
aug_traj/return: -6653.892496265896
aug_traj/length: 4000.0
time/training: 52.976927757263184
training/train_loss_mean: -2.2907271602010244
training/train_loss_std: 0.36542883410482285
training/nll: -6.135098934173584
training/entropy: -3.690521478652954
training/temp_value: 1.0099133029157248
time/total: 24018.808217287064

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 325
aug_traj/return: -6882.932923515217
aug_traj/length: 4000.0
time/training: 53.15730619430542
training/train_loss_mean: -2.3171695736342484
training/train_loss_std: 0.321763542053268
training/nll: -4.4983367919921875
training/entropy: -2.3802051544189453
training/temp_value: 1.0106427732304963
time/total: 24083.775640010834

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 326
aug_traj/return: -7152.176722852117
aug_traj/length: 4000.0
time/training: 52.9956910610199
training/train_loss_mean: -2.250104634027984


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 341
aug_traj/return: -6953.637152980266
aug_traj/length: 4000.0
time/training: 53.06676244735718
training/train_loss_mean: -2.267588469270396
training/train_loss_std: 0.4594710902266416
training/nll: -6.2096405029296875
training/entropy: -3.695206880569458
training/temp_value: 1.003845293539242
time/total: 25251.537494659424

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 342
aug_traj/return: -7436.172127270672
aug_traj/length: 4000.0
time/training: 53.16140270233154
training/train_loss_mean: -2.229255627164403
training/train_loss_std: 0.3127896878770337
training/nll: -5.779402732849121
training/entropy: -3.1081340312957764
training/temp_value: 0.9996798756698342
time/total: 25316.76926088333

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 343
aug_traj/return: -6947.852074300865
aug_traj/length: 4000.0
time/training: 53.190940618515015
training/train_loss_mean: -2.255093447237914
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 358
aug_traj/return: -6161.391495027188
aug_traj/length: 4000.0
time/training: 53.04134559631348
training/train_loss_mean: -2.2407558897861
training/train_loss_std: 0.33085037807381373
training/nll: -5.84437370300293
training/entropy: -2.995811700820923
training/temp_value: 0.9981470933662929
time/total: 26423.502264261246

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 359
aug_traj/return: -9663.447126966881
aug_traj/length: 4000.0
time/training: 52.95397067070007
training/train_loss_mean: -2.1477590305368683
training/train_loss_std: 1.120956879845306
training/nll: -5.956376552581787
training/entropy: -3.419840097427368
training/temp_value: 0.9952065342644018
time/total: 26488.356548070908

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 360
aug_traj/return: -7166.487343277988
aug_traj/length: 4000.0
time/training: 52.94953274726868
training/train_loss_mean: -2.264408747431242
train


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 375
aug_traj/return: -5447.268513061053
aug_traj/length: 4000.0
time/training: 53.174970865249634
training/train_loss_mean: -2.205727830489717
training/train_loss_std: 0.3363820548602038
training/nll: -4.652987957000732
training/entropy: -2.7114920616149902
training/temp_value: 0.9957962156478631
time/total: 27657.340827703476

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 376
aug_traj/return: -7657.577275380611
aug_traj/length: 4000.0
time/training: 53.004875898361206
training/train_loss_mean: -2.17859250257877
training/train_loss_std: 0.32673592834548726
training/nll: -6.66756534576416
training/entropy: -3.8317208290100098
training/temp_value: 0.9940761210530186
time/total: 27722.26443696022

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 377
aug_traj/return: -8747.124010755744
aug_traj/length: 4000.0
time/training: 53.03960728645325
training/train_loss_mean: -2.193499984906475
t


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 392
aug_traj/return: -8814.157324674909
aug_traj/length: 4000.0
time/training: 53.10065531730652
training/train_loss_mean: -2.0623519641120804
training/train_loss_std: 1.17840220411424
training/nll: -5.393714427947998
training/entropy: -3.2157206535339355
training/temp_value: 0.9803556767408217
time/total: 28891.486983537674

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 393
aug_traj/return: -6139.460144480114
aug_traj/length: 4000.0
time/training: 53.01301956176758
training/train_loss_mean: -2.127429450885925
training/train_loss_std: 0.32760882367707567
training/nll: -5.140430450439453
training/entropy: -3.1751601696014404
training/temp_value: 0.9759464737825373
time/total: 28956.464958906174

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 394
aug_traj/return: -7004.165593273052
aug_traj/length: 4000.0
time/training: 53.097630977630615
training/train_loss_mean: -2.1378457126590544


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 409
aug_traj/return: -7254.102828893594
aug_traj/length: 4000.0
time/training: 53.043954849243164
training/train_loss_mean: -2.147023698215768
training/train_loss_std: 0.35469227727239144
training/nll: -5.925656318664551
training/entropy: -3.488471031188965
training/temp_value: 0.9707164355698025
time/total: 30063.840349435806

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 410
aug_traj/return: -8269.466623046948
aug_traj/length: 4000.0
time/training: 53.04191184043884
training/train_loss_mean: -2.1590227159041326
training/train_loss_std: 0.3493620828374727
training/nll: -4.868659496307373
training/entropy: -2.9504916667938232
training/temp_value: 0.9720900541275963
evaluation/return_mean_gm: -303.2659790655438
evaluation/return_std_gm: 96.1313426919607
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.605119466781616
time/total: 30192.64823293686

Model saved at ./ex


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 426
aug_traj/return: -7705.916054998877
aug_traj/length: 4000.0
time/training: 53.071168422698975
training/train_loss_mean: -2.1125330752758202
training/train_loss_std: 0.34479583300512895
training/nll: -3.748488426208496
training/entropy: -2.260636568069458
training/temp_value: 0.9616695539182172
time/total: 31296.33061027527

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 427
aug_traj/return: -8843.705263549424
aug_traj/length: 4000.0
time/training: 52.96794319152832
training/train_loss_mean: -2.096841318524937
training/train_loss_std: 0.5011586831958041
training/nll: -3.2921977043151855
training/entropy: -2.483236074447632
training/temp_value: 0.9612522810250187
time/total: 31361.249281167984

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 428
aug_traj/return: -7104.984176354337
aug_traj/length: 4000.0
time/training: 53.246946573257446
training/train_loss_mean: -2.128806889474097


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 443
aug_traj/return: -4912.691158993442
aug_traj/length: 4000.0
time/training: 53.11039972305298
training/train_loss_mean: -2.021914169602566
training/train_loss_std: 1.2327853075805573
training/nll: -5.210452079772949
training/entropy: -3.4000964164733887
training/temp_value: 0.9466674390559287
time/total: 32529.539102077484

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 444
aug_traj/return: -8738.408615733828
aug_traj/length: 4000.0
time/training: 53.022937297821045
training/train_loss_mean: -2.0832836030593787
training/train_loss_std: 0.3703253604457099
training/nll: -3.9543051719665527
training/entropy: -2.1929869651794434
training/temp_value: 0.9461078463165598
time/total: 32594.33730816841

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 445
aug_traj/return: -5389.470035243736
aug_traj/length: 4000.0
time/training: 52.94055199623108
training/train_loss_mean: -2.061204125039815


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 460
aug_traj/return: -8721.548473433
aug_traj/length: 4000.0
time/training: 53.16066884994507
training/train_loss_mean: -2.057464330601363
training/train_loss_std: 0.3231075334405943
training/nll: -4.792158126831055
training/entropy: -2.7830264568328857
training/temp_value: 0.9384088551697072
evaluation/return_mean_gm: -130.70675694049032
evaluation/return_std_gm: 75.98104369633168
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.67361497879028
time/total: 33762.377892017365

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 461
aug_traj/return: -8018.379374274072
aug_traj/length: 4000.0
time/training: 53.03432106971741
training/train_loss_mean: -2.041301427909015
training/train_loss_std: 0.7335465001476832
training/nll: -5.285221576690674
training/entropy: -2.878634214401245
training/temp_value: 0.939924935424406
time/total: 33827.302178144455

Model saved at ./exp/202


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 477
aug_traj/return: -5332.622454531282
aug_traj/length: 4000.0
time/training: 53.014986515045166
training/train_loss_mean: -1.6712416350797372
training/train_loss_std: 4.5617747156115
training/nll: -5.034152507781982
training/entropy: -3.3179380893707275
training/temp_value: 0.9239741441872213
time/total: 34931.41345024109

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 478
aug_traj/return: -4931.780654119315
aug_traj/length: 4000.0
time/training: 53.06175923347473
training/train_loss_mean: -1.983378304468113
training/train_loss_std: 0.9905313568396281
training/nll: -5.459045886993408
training/entropy: -3.4707915782928467
training/temp_value: 0.9234123356873336
time/total: 34996.36376953125

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 479
aug_traj/return: -4950.45223259694
aug_traj/length: 4000.0
time/training: 53.068952798843384
training/train_loss_mean: -1.899335360719795
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 494
aug_traj/return: -7112.007607796694
aug_traj/length: 4000.0
time/training: 53.156997203826904
training/train_loss_mean: -2.0235618446990573
training/train_loss_std: 0.6025874836970778
training/nll: -4.864315032958984
training/entropy: -3.107664108276367
training/temp_value: 0.9159314535406462
time/total: 36166.953583717346

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 495
aug_traj/return: -7811.70800379391
aug_traj/length: 4000.0
time/training: 53.19178247451782
training/train_loss_mean: -1.8363510860964998
training/train_loss_std: 2.734035278596823
training/nll: -5.1299848556518555
training/entropy: -3.472615957260132
training/temp_value: 0.9141408446172873
time/total: 36232.16168498993

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 496
aug_traj/return: -7471.42268530188
aug_traj/length: 4000.0
time/training: 53.031747579574585
training/train_loss_mean: -2.0195251006236905
t


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 511
aug_traj/return: -5260.011427732264
aug_traj/length: 4000.0
time/training: 53.040656089782715
training/train_loss_mean: -1.9894628097836105
training/train_loss_std: 0.6521575159032851
training/nll: -5.037650108337402
training/entropy: -3.204423189163208
training/temp_value: 0.9073643311849443
time/total: 37400.417219638824

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 512
aug_traj/return: -6985.176681688047
aug_traj/length: 4000.0
time/training: 53.13563919067383
training/train_loss_mean: -1.9823907496758473
training/train_loss_std: 0.44755057001725584
training/nll: -6.173192977905273
training/entropy: -3.973560094833374
training/temp_value: 0.9050040390648001
time/total: 37465.715715408325

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 513
aug_traj/return: -5969.277555896029
aug_traj/length: 4000.0
time/training: 52.92818284034729
training/train_loss_mean: -2.030929621042842


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 528
aug_traj/return: -5554.020685443983
aug_traj/length: 4000.0
time/training: 52.97265386581421
training/train_loss_mean: -2.015785930251647
training/train_loss_std: 0.40220167928267436
training/nll: -4.335040092468262
training/entropy: -2.7009196281433105
training/temp_value: 0.8961429640862558
time/total: 38568.787317276

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 529
aug_traj/return: -5304.4790560894
aug_traj/length: 4000.0
time/training: 53.13551664352417
training/train_loss_mean: -1.9575871269806013
training/train_loss_std: 0.8843765859189566
training/nll: -4.687799453735352
training/entropy: -3.330662488937378
training/temp_value: 0.8940028983945277
time/total: 38634.01187610626

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 530
aug_traj/return: -7336.5256144900395
aug_traj/length: 4000.0
time/training: 52.83968162536621
training/train_loss_mean: -1.6783865382368643
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 545
aug_traj/return: -6042.103583642168
aug_traj/length: 4000.0
time/training: 53.08287835121155
training/train_loss_mean: -1.8451131456607153
training/train_loss_std: 1.6236938278158568
training/nll: -4.4481964111328125
training/entropy: -2.73991322517395
training/temp_value: 0.8876842844806226
time/total: 39800.74735856056

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 546
aug_traj/return: -6802.933572106284
aug_traj/length: 4000.0
time/training: 52.990453243255615
training/train_loss_mean: -1.9561262469683016
training/train_loss_std: 0.8889523926032735
training/nll: -4.202238082885742
training/entropy: -2.6696178913116455
training/temp_value: 0.8862165718382946
time/total: 39865.919873952866

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 547
aug_traj/return: -4800.874795407383
aug_traj/length: 4000.0
time/training: 52.60162615776062
training/train_loss_mean: -1.978212190602262



Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 562
aug_traj/return: -4731.9172518779005
aug_traj/length: 4000.0
time/training: 52.880908489227295
training/train_loss_mean: -1.4733495851618166
training/train_loss_std: 6.9273622812614
training/nll: -5.3035502433776855
training/entropy: -3.5154130458831787
training/temp_value: 0.87745624580432
time/total: 41030.82000994682

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 563
aug_traj/return: -5965.979571040852
aug_traj/length: 4000.0
time/training: 52.92608952522278
training/train_loss_mean: -1.8580239007401194
training/train_loss_std: 1.7939625646657607
training/nll: -6.200613975524902
training/entropy: -4.080485820770264
training/temp_value: 0.8795319417324426
time/total: 41095.84838891029

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 564
aug_traj/return: -5010.502814551504
aug_traj/length: 4000.0
time/training: 53.00977563858032
training/train_loss_mean: -1.4956456754356595
tra


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 579
aug_traj/return: -9316.884191455847
aug_traj/length: 4000.0
time/training: 53.116947412490845
training/train_loss_mean: -1.7472763779651985
training/train_loss_std: 2.838641817610607
training/nll: -5.483918190002441
training/entropy: -3.6717474460601807
training/temp_value: 0.8693639872056577
time/total: 42201.16020441055

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 580
aug_traj/return: -6114.336101848955
aug_traj/length: 4000.0
time/training: 59.964383125305176
training/train_loss_mean: -1.9656698532421335
training/train_loss_std: 0.4696158524215309
training/nll: -3.855297565460205
training/entropy: -2.587367534637451
training/temp_value: 0.8701059324735837
evaluation/return_mean_gm: -0.00976077971359337
evaluation/return_std_gm: 0.010408269482722369
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 68.5495023727417
time/total: 42341.782041072845

Model saved at 


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 596
aug_traj/return: -3901.3176185736243
aug_traj/length: 4000.0
time/training: 58.265772581100464
training/train_loss_mean: -1.6513932821878126
training/train_loss_std: 2.764897551369901
training/nll: -4.6963725090026855
training/entropy: -3.295849323272705
training/temp_value: 0.8621614595648178
time/total: 43559.97594666481

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 597
aug_traj/return: -6409.684082311129
aug_traj/length: 4000.0
time/training: 58.09752631187439
training/train_loss_mean: -1.8896165920299
training/train_loss_std: 1.216270045634852
training/nll: -5.640977382659912
training/entropy: -3.7624218463897705
training/temp_value: 0.8614863663717265
time/total: 43631.40363931656

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 598
aug_traj/return: -6926.075445394602
aug_traj/length: 4000.0
time/training: 57.67398262023926
training/train_loss_mean: -1.9950743826988484
tra


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 613
aug_traj/return: -6616.098278493082
aug_traj/length: 4000.0
time/training: 58.45591330528259
training/train_loss_mean: -1.1869928704942732
training/train_loss_std: 7.660245546971743
training/nll: -3.8881757259368896
training/entropy: -2.547442674636841
training/temp_value: 0.856727157892406
time/total: 44924.72356700897

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 614
aug_traj/return: -9339.844544631997
aug_traj/length: 4000.0
time/training: 59.25360417366028
training/train_loss_mean: -1.9197012301116125
training/train_loss_std: 0.6767148439915861
training/nll: -4.2899675369262695
training/entropy: -2.89994740486145
training/temp_value: 0.8576429641731994
time/total: 44997.03026819229

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 615
aug_traj/return: -6838.80976496357
aug_traj/length: 4000.0
time/training: 63.89518713951111
training/train_loss_mean: -1.7671044002332488
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 630
aug_traj/return: -7433.108055504723
aug_traj/length: 4000.0
time/training: 61.17707562446594
training/train_loss_mean: -1.4222992037580184
training/train_loss_std: 5.496614840647363
training/nll: -3.6610641479492188
training/entropy: -2.5847737789154053
training/temp_value: 0.842753982472694
evaluation/return_mean_gm: -11336.090814719031
evaluation/return_std_gm: 327.5264020934107
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 74.50187587738037
time/total: 46388.31763124466

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 631
aug_traj/return: -4452.132736700691
aug_traj/length: 4000.0
time/training: 61.141570806503296
training/train_loss_mean: -1.5961783195002972
training/train_loss_std: 4.294463241148899
training/nll: -3.288738250732422
training/entropy: -1.9117003679275513
training/temp_value: 0.840085561294041
time/total: 46464.3759765625

Model saved at ./exp/2


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 647
aug_traj/return: -6740.425284988338
aug_traj/length: 4000.0
time/training: 53.15804576873779
training/train_loss_mean: -1.3601048732786651
training/train_loss_std: 8.581129932757346
training/nll: -5.612442970275879
training/entropy: -3.930159568786621
training/temp_value: 0.8294636826438673
time/total: 47627.38441538811

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 648
aug_traj/return: -5325.183412754664
aug_traj/length: 4000.0
time/training: 53.08150577545166
training/train_loss_mean: -1.781963927851716
training/train_loss_std: 2.542060289496847
training/nll: -3.952519178390503
training/entropy: -2.6437981128692627
training/temp_value: 0.8302744298658152
time/total: 47692.69004058838

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 649
aug_traj/return: -8384.438255577299
aug_traj/length: 4000.0
time/training: 53.139721393585205
training/train_loss_mean: -1.3934269050745278
tra


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 664
aug_traj/return: -5304.296756341929
aug_traj/length: 4000.0
time/training: 53.010295152664185
training/train_loss_mean: -1.9611596628979238
training/train_loss_std: 0.4017254833673663
training/nll: -6.101932525634766
training/entropy: -4.143337726593018
training/temp_value: 0.8256705331644368
time/total: 48864.29753947258

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 665
aug_traj/return: -5683.20785208044
aug_traj/length: 4000.0
time/training: 53.34069228172302
training/train_loss_mean: -1.3862794591253862
training/train_loss_std: 5.97097707955382
training/nll: -6.0254364013671875
training/entropy: -4.427865028381348
training/temp_value: 0.8233474759037558
time/total: 48929.789601802826

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 666
aug_traj/return: -5941.144407466401
aug_traj/length: 4000.0
time/training: 53.265023708343506
training/train_loss_mean: -1.6527320530867327
t


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 681
aug_traj/return: -8563.975871717783
aug_traj/length: 4000.0
time/training: 53.18583130836487
training/train_loss_mean: -1.348541686404539
training/train_loss_std: 6.896208791579452
training/nll: -4.595165729522705
training/entropy: -3.339850902557373
training/temp_value: 0.8098849524213931
time/total: 50101.226925849915

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 682
aug_traj/return: -5904.336899603241
aug_traj/length: 4000.0
time/training: 53.17064881324768
training/train_loss_mean: -1.9689175395858096
training/train_loss_std: 0.2908584692135815
training/nll: -3.303722620010376
training/entropy: -2.073221445083618
training/temp_value: 0.809616087442239
time/total: 50167.02051997185

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 683
aug_traj/return: -5923.954542704612
aug_traj/length: 4000.0
time/training: 53.03973841667175
training/train_loss_mean: -1.7457165353915836
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 698
aug_traj/return: -5252.537132273311
aug_traj/length: 4000.0
time/training: 53.194360971450806
training/train_loss_mean: -1.550704393291824
training/train_loss_std: 3.8039466254558243
training/nll: -4.632140636444092
training/entropy: -3.400980234146118
training/temp_value: 0.7987040415542654
time/total: 51273.79546761513

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 699
aug_traj/return: -4582.1259920045295
aug_traj/length: 4000.0
time/training: 53.26751089096069
training/train_loss_mean: -1.5108836258520177
training/train_loss_std: 3.9631586902078344
training/nll: -3.9259510040283203
training/entropy: -2.6724793910980225
training/temp_value: 0.7975461334484661
time/total: 51339.072652339935

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 700
aug_traj/return: -8453.620980158561
aug_traj/length: 4000.0
time/training: 53.05467486381531
training/train_loss_mean: -1.563379647361151


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 715
aug_traj/return: -7061.074898537904
aug_traj/length: 4000.0
time/training: 53.194739818573
training/train_loss_mean: -1.9675184592334187
training/train_loss_std: 0.3275943598288656
training/nll: -2.7595794200897217
training/entropy: -1.8896714448928833
training/temp_value: 0.7918290413550299
time/total: 52512.340819358826

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 716
aug_traj/return: -7118.3057923257065
aug_traj/length: 4000.0
time/training: 53.1400043964386
training/train_loss_mean: -1.5109977687834297
training/train_loss_std: 4.928503984338797
training/nll: -4.712206840515137
training/entropy: -3.348464250564575
training/temp_value: 0.7906269667226045
time/total: 52577.592878341675

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 717
aug_traj/return: -4635.328755722947
aug_traj/length: 4000.0
time/training: 53.235233545303345
training/train_loss_mean: -1.6493916120528407



Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 732
aug_traj/return: -5518.74507274464
aug_traj/length: 4000.0
time/training: 57.60159635543823
training/train_loss_mean: -1.5490197684031057
training/train_loss_std: 4.667732358441628
training/nll: -5.555716037750244
training/entropy: -3.526520252227783
training/temp_value: 0.7835638453378109
time/total: 53792.60398745537

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 733
aug_traj/return: -10987.253195344354
aug_traj/length: 4000.0
time/training: 57.60316586494446
training/train_loss_mean: -1.7546423354645913
training/train_loss_std: 2.7840002497931913
training/nll: -4.558536529541016
training/entropy: -3.226583242416382
training/temp_value: 0.7850804860232948
time/total: 53863.123797893524

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 734
aug_traj/return: -9743.298330424068
aug_traj/length: 4000.0
time/training: 58.16932773590088
training/train_loss_mean: -0.4330699972788638
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 749
aug_traj/return: -15836.781034895079
aug_traj/length: 4000.0
time/training: 58.134087562561035
training/train_loss_mean: -1.1164312831913583
training/train_loss_std: 9.993310017996343
training/nll: -4.755673885345459
training/entropy: -3.3298768997192383
training/temp_value: 0.7780611463726674
time/total: 55063.40661549568

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 750
aug_traj/return: -4870.399449005323
aug_traj/length: 4000.0
time/training: 59.255412340164185
training/train_loss_mean: -0.45653461271987394
training/train_loss_std: 16.180567743179147
training/nll: -3.927556037902832
training/entropy: -2.797524929046631
training/temp_value: 0.7781284893320696
evaluation/return_mean_gm: -10424.768954120918
evaluation/return_std_gm: 1214.1963243360833
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 71.03150200843811
time/total: 55207.567368507385

Model saved at 


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 766
aug_traj/return: -24754.699881619268
aug_traj/length: 4000.0
time/training: 56.67311358451843
training/train_loss_mean: -0.9183071898538767
training/train_loss_std: 10.774922813538709
training/nll: -3.9626872539520264
training/entropy: -2.8396294116973877
training/temp_value: 0.7689519834392109
time/total: 56410.91030240059

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 767
aug_traj/return: -10044.971119338788
aug_traj/length: 4000.0
time/training: 57.53713130950928
training/train_loss_mean: -1.0384612569793876
training/train_loss_std: 10.264716586411211
training/nll: -3.6723461151123047
training/entropy: -2.6659750938415527
training/temp_value: 0.7695892288376454
time/total: 56481.64204502106

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 768
aug_traj/return: -9447.24944700206
aug_traj/length: 4000.0
time/training: 56.735356092453
training/train_loss_mean: -1.5526796083376766


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 783
aug_traj/return: -8174.612963042442
aug_traj/length: 4000.0
time/training: 59.22249674797058
training/train_loss_mean: -1.4961805991370112
training/train_loss_std: 6.3593232433247255
training/nll: -4.4098944664001465
training/entropy: -3.23992919921875
training/temp_value: 0.7629511340753371
time/total: 57756.067501068115

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 784
aug_traj/return: -14090.347835153416
aug_traj/length: 4000.0
time/training: 58.88096570968628
training/train_loss_mean: -1.673654410624992
training/train_loss_std: 3.1418278306144285
training/nll: -4.525963306427002
training/entropy: -2.731736898422241
training/temp_value: 0.7615501791121875
time/total: 57828.30710506439

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 785
aug_traj/return: -8888.879122795164
aug_traj/length: 4000.0
time/training: 57.74012207984924
training/train_loss_mean: -1.032122218249088
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 800
aug_traj/return: -8523.561614832843
aug_traj/length: 4000.0
time/training: 61.004475355148315
training/train_loss_mean: -1.144342929092101
training/train_loss_std: 7.222094268401866
training/nll: -4.473752021789551
training/entropy: -3.2930908203125
training/temp_value: 0.759835299398206
evaluation/return_mean_gm: -4226.550184971087
evaluation/return_std_gm: 3541.641795115058
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 73.2445502281189
time/total: 59109.82110595703

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 801
aug_traj/return: -14886.000773233707
aug_traj/length: 4000.0
time/training: 58.30339336395264
training/train_loss_mean: -1.771429828182984
training/train_loss_std: 1.4967728776758926
training/nll: -4.176055908203125
training/entropy: -3.1095540523529053
training/temp_value: 0.7596360319905202
time/total: 59182.081329107285

Model saved at ./exp/2023


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 817
aug_traj/return: -26341.17935238819
aug_traj/length: 4000.0
time/training: 57.54556488990784
training/train_loss_mean: -1.7976972113801284
training/train_loss_std: 1.7794202737598213
training/nll: -5.441306114196777
training/entropy: -3.989529848098755
training/temp_value: 0.7615271976941433
time/total: 60384.04683160782

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 818
aug_traj/return: -18851.856901585474
aug_traj/length: 4000.0
time/training: 56.994065046310425
training/train_loss_mean: -0.40225825396822945
training/train_loss_std: 11.238520756082167
training/nll: -4.679109573364258
training/entropy: -3.5239880084991455
training/temp_value: 0.7611557842506572
time/total: 60453.86667919159

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 819
aug_traj/return: -24847.008983770676
aug_traj/length: 4000.0
time/training: 57.56401872634888
training/train_loss_mean: -1.87948030618293


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 834
aug_traj/return: -18827.028832725686
aug_traj/length: 4000.0
time/training: 57.06691241264343
training/train_loss_mean: -1.6416368739623297
training/train_loss_std: 3.894334471070775
training/nll: -4.908287048339844
training/entropy: -3.902094602584839
training/temp_value: 0.7599890896017834
time/total: 61718.82878255844

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 835
aug_traj/return: -26665.02796594894
aug_traj/length: 4000.0
time/training: 57.050628662109375
training/train_loss_mean: -1.7672776529169467
training/train_loss_std: 1.5472123608757202
training/nll: -4.602732181549072
training/entropy: -3.542759656906128
training/temp_value: 0.759404781902763
time/total: 61788.9361102581

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 836
aug_traj/return: -38709.0
aug_traj/length: 4000.0
time/training: 57.236907720565796
training/train_loss_mean: -1.3552437642803887
training/tra


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 851
aug_traj/return: -41257.99946336639
aug_traj/length: 4000.0
time/training: 57.13484287261963
training/train_loss_mean: -1.4071981769844688
training/train_loss_std: 7.412020221853338
training/nll: -3.360140085220337
training/entropy: -2.4865920543670654
training/temp_value: 0.7691940514879838
time/total: 63055.30105137825

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 852
aug_traj/return: -41365.876624487246
aug_traj/length: 4000.0
time/training: 56.7592453956604
training/train_loss_mean: -1.698655928416432
training/train_loss_std: 1.3686119053179016
training/nll: -3.600095510482788
training/entropy: -2.5923352241516113
training/temp_value: 0.7683799116398811
time/total: 63124.97449231148

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 853
aug_traj/return: -32705.42477730319
aug_traj/length: 4000.0
time/training: 57.452709436416626
training/train_loss_mean: -1.808189643319391
tr


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 868
aug_traj/return: -25807.388882699637
aug_traj/length: 4000.0
time/training: 58.88149333000183
training/train_loss_mean: -1.5391301747106982
training/train_loss_std: 3.1273414755050175
training/nll: -4.235141277313232
training/entropy: -3.0830700397491455
training/temp_value: 0.773408910164361
time/total: 64329.27179694176

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 869
aug_traj/return: -33362.38834851213
aug_traj/length: 4000.0
time/training: 57.656933069229126
training/train_loss_mean: -1.7888652356804755
training/train_loss_std: 0.23086274869241857
training/nll: -3.8166580200195312
training/entropy: -2.766650438308716
training/temp_value: 0.7721611497116276
time/total: 64399.86543941498

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 870
aug_traj/return: -29618.01744518287
aug_traj/length: 4000.0
time/training: 58.059406757354736
training/train_loss_mean: -1.61806276619899


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 885
aug_traj/return: -37903.059191895016
aug_traj/length: 4000.0
time/training: 53.20684099197388
training/train_loss_mean: -1.3396450766062677
training/train_loss_std: 3.990625454388763
training/nll: -3.247931480407715
training/entropy: -2.1036486625671387
training/temp_value: 0.7729191319260039
time/total: 65600.47417283058

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 886
aug_traj/return: -12248.927724833975
aug_traj/length: 4000.0
time/training: 53.070664167404175
training/train_loss_mean: -1.7999389624577615
training/train_loss_std: 0.2548314715086727
training/nll: -4.5050177574157715
training/entropy: -3.374084949493408
training/temp_value: 0.7739003739729657
time/total: 65665.68090248108

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 887
aug_traj/return: -31355.025555753782
aug_traj/length: 4000.0
time/training: 53.11575126647949
training/train_loss_mean: -1.59211563211452


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 902
aug_traj/return: -6491.505997528199
aug_traj/length: 4000.0
time/training: 57.912076473236084
training/train_loss_mean: -1.6743188679913301
training/train_loss_std: 2.0582982375988093
training/nll: -4.296019554138184
training/entropy: -3.0468571186065674
training/temp_value: 0.7686221457359377
time/total: 66875.2647178173

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 903
aug_traj/return: -24959.891935867414
aug_traj/length: 4000.0
time/training: 59.045849561691284
training/train_loss_mean: -1.7387342658858265
training/train_loss_std: 0.5224786119042878
training/nll: -3.50321102142334
training/entropy: -2.6656243801116943
training/temp_value: 0.7678856236970673
time/total: 66947.84681034088

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 904
aug_traj/return: -24494.422646758412
aug_traj/length: 4000.0
time/training: 57.445072889328
training/train_loss_mean: -1.4285564677090012



Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 919
aug_traj/return: -9173.760776832349
aug_traj/length: 4000.0
time/training: 57.25754904747009
training/train_loss_mean: -1.7689527538388574
training/train_loss_std: 0.2231355992557172
training/nll: -4.621600151062012
training/entropy: -3.474544048309326
training/temp_value: 0.7669798683198489
time/total: 68171.27570033073

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 920
aug_traj/return: -6158.845826398734
aug_traj/length: 4000.0
time/training: 57.200313329696655
training/train_loss_mean: -1.640966832906749
training/train_loss_std: 2.1189185336384697
training/nll: -3.1088221073150635
training/entropy: -2.1034789085388184
training/temp_value: 0.7659031921477638
evaluation/return_mean_gm: -5555.566420467383
evaluation/return_std_gm: 1551.4107197486528
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 68.94213461875916
time/total: 68310.41820836067

Model saved at ./ex


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 936
aug_traj/return: -20448.591579131917
aug_traj/length: 4000.0
time/training: 57.488057374954224
training/train_loss_mean: -1.7776648406068731
training/train_loss_std: 0.20012115296784508
training/nll: -4.393817901611328
training/entropy: -3.1249585151672363
training/temp_value: 0.7518106681585939
time/total: 69520.58246850967

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 937
aug_traj/return: -12528.901344315182
aug_traj/length: 4000.0
time/training: 57.9417040348053
training/train_loss_mean: -1.767041682125561
training/train_loss_std: 0.20483906857976225
training/nll: -4.013445854187012
training/entropy: -3.111690044403076
training/temp_value: 0.7521642338340049
time/total: 69591.77497673035

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 938
aug_traj/return: -14496.659292132652
aug_traj/length: 4000.0
time/training: 57.912567138671875
training/train_loss_mean: -1.5839613436601


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 953
aug_traj/return: -16297.576068012102
aug_traj/length: 4000.0
time/training: 57.292415142059326
training/train_loss_mean: -1.757835440949018
training/train_loss_std: 0.5842399155283847
training/nll: -4.266885757446289
training/entropy: -3.1855688095092773
training/temp_value: 0.7413788414725159
time/total: 70864.21240401268

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 954
aug_traj/return: -9719.674667209178
aug_traj/length: 4000.0
time/training: 57.17199730873108
training/train_loss_mean: -1.7146145689761576
training/train_loss_std: 1.0065127032453027
training/nll: -4.187918663024902
training/entropy: -2.8144142627716064
training/temp_value: 0.7398565047006156
time/total: 70935.00704050064

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 955
aug_traj/return: -15597.071473852719
aug_traj/length: 4000.0
time/training: 57.16161751747131
training/train_loss_mean: -1.523603658952367


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 970
aug_traj/return: -24152.610740672953
aug_traj/length: 4000.0
time/training: 58.96287679672241
training/train_loss_mean: -1.7837300682036865
training/train_loss_std: 0.20859366275400243
training/nll: -4.12493371963501
training/entropy: -3.129828929901123
training/temp_value: 0.7361518604756684
evaluation/return_mean_gm: -12.0
evaluation/return_std_gm: 0.0
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 70.52573299407959
time/total: 72236.82364583015

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 971
aug_traj/return: -26331.136563817767
aug_traj/length: 4000.0
time/training: 59.12745141983032
training/train_loss_mean: -1.7718412884588792
training/train_loss_std: 0.19868929605575864
training/nll: -4.146674633026123
training/entropy: -3.285532236099243
training/temp_value: 0.7360223220048138
time/total: 72309.1548166275

Model saved at ./exp/2023.03.20/222713-default/


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 987
aug_traj/return: -9837.428681858659
aug_traj/length: 4000.0
time/training: 57.78977870941162
training/train_loss_mean: -1.4591541607511405
training/train_loss_std: 5.489680396497862
training/nll: -3.7474563121795654
training/entropy: -2.7840983867645264
training/temp_value: 0.7283087009632213
time/total: 73529.41424703598

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 988
aug_traj/return: -27984.0
aug_traj/length: 4000.0
time/training: 57.44822692871094
training/train_loss_mean: -1.7751300698820682
training/train_loss_std: 0.20870897700670982
training/nll: -3.992882490158081
training/entropy: -2.978182554244995
training/temp_value: 0.7277675375876343
time/total: 73599.80060505867

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 989
aug_traj/return: -10068.066851371852
aug_traj/length: 4000.0
time/training: 57.28944730758667
training/train_loss_mean: -1.5003338040306105
training/


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1004
aug_traj/return: -47246.32831679421
aug_traj/length: 4000.0
time/training: 53.06424140930176
training/train_loss_mean: -1.7853768076289487
training/train_loss_std: 0.20248854370824435
training/nll: -4.436842441558838
training/entropy: -3.547738552093506
training/temp_value: 0.7209115046147272
time/total: 74798.71250915527

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1005
aug_traj/return: -30491.640270642383
aug_traj/length: 4000.0
time/training: 53.28415322303772
training/train_loss_mean: -1.7739695915114924
training/train_loss_std: 0.19507276486445194
training/nll: -4.314037799835205
training/entropy: -3.4545018672943115
training/temp_value: 0.7217273516117613
time/total: 74863.72858428955

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1006
aug_traj/return: -28789.0
aug_traj/length: 4000.0
time/training: 53.189642906188965
training/train_loss_mean: -1.7762513456707962
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1021
aug_traj/return: -32106.96187137545
aug_traj/length: 4000.0
time/training: 59.19942331314087
training/train_loss_mean: -1.7993071923590818
training/train_loss_std: 0.18734467301936436
training/nll: -4.108376979827881
training/entropy: -3.0131020545959473
training/temp_value: 0.7170946201122776
time/total: 76133.42201447487

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1022
aug_traj/return: -10258.708573752057
aug_traj/length: 4000.0
time/training: 60.07633709907532
training/train_loss_mean: -1.7840281221351162
training/train_loss_std: 0.20022941761225208
training/nll: -4.465757369995117
training/entropy: -3.347996950149536
training/temp_value: 0.7167213629227456
time/total: 76207.96731328964

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1023
aug_traj/return: -6849.518724211777
aug_traj/length: 4000.0
time/training: 59.35073399543762
training/train_loss_mean: -1.759184875244


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1038
aug_traj/return: -35966.14730350618
aug_traj/length: 4000.0
time/training: 57.977721214294434
training/train_loss_mean: -1.8044575566573204
training/train_loss_std: 0.2040602095722018
training/nll: -3.4924890995025635
training/entropy: -2.7726616859436035
training/temp_value: 0.6933763448916677
time/total: 77469.49969482422

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1039
aug_traj/return: -40992.459362374415
aug_traj/length: 4000.0
time/training: 57.79195189476013
training/train_loss_mean: -1.8210150152562066
training/train_loss_std: 0.19183649777996545
training/nll: -4.072571754455566
training/entropy: -3.0957534313201904
training/temp_value: 0.6932466776574537
time/total: 77540.34718847275

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1040
aug_traj/return: -8097.541530121586
aug_traj/length: 4000.0
time/training: 57.99756860733032
training/train_loss_mean: -1.8035048932


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1055
aug_traj/return: -41975.0
aug_traj/length: 4000.0
time/training: 57.77171874046326
training/train_loss_mean: -1.8427610933392728
training/train_loss_std: 0.21738911455956858
training/nll: -4.024746894836426
training/entropy: -3.1547131538391113
training/temp_value: 0.6731971835132129
time/total: 78818.03251242638

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1056
aug_traj/return: -8194.038458943663
aug_traj/length: 4000.0
time/training: 57.22866082191467
training/train_loss_mean: -1.8308905489369742
training/train_loss_std: 0.21022964601520652
training/nll: -3.753101110458374
training/entropy: -2.776724100112915
training/temp_value: 0.6705646997777459
time/total: 78888.04123878479

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1057
aug_traj/return: -8630.57580158005
aug_traj/length: 4000.0
time/training: 57.90087580680847
training/train_loss_mean: -1.8385987933258814
trainin


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1072
aug_traj/return: -41857.14007316012
aug_traj/length: 4000.0
time/training: 55.732014656066895
training/train_loss_mean: -1.8729166175401448
training/train_loss_std: 0.21210253567876078
training/nll: -4.5941925048828125
training/entropy: -3.6641311645507812
training/temp_value: 0.6511138823237879
time/total: 80187.9167330265

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1073
aug_traj/return: -38537.0
aug_traj/length: 4000.0
time/training: 53.405553102493286
training/train_loss_mean: -1.8593538513867118
training/train_loss_std: 0.2169285190335257
training/nll: -3.191753387451172
training/entropy: -2.4331822395324707
training/temp_value: 0.6494464408452334
time/total: 80253.38190484047

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1074
aug_traj/return: -8894.342434663324
aug_traj/length: 4000.0
time/training: 53.37680435180664
training/train_loss_mean: -1.8789598044513818
trai


Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1089
aug_traj/return: -47769.0
aug_traj/length: 4000.0
time/training: 53.37994194030762
training/train_loss_mean: -1.915739634810728
training/train_loss_std: 0.22425103609791908
training/nll: -4.001254558563232
training/entropy: -3.067880868911743
training/temp_value: 0.6311446331802057
time/total: 81363.82887673378

Model saved at ./exp/2023.03.20/222713-default/model.pt
Iteration 1090
aug_traj/return: -11586.490867586925
aug_traj/length: 4000.0
time/training: 53.364553689956665
training/train_loss_mean: -1.9180692115965186
training/train_loss_std: 0.23457959649323273
training/nll: -4.883384704589844
training/entropy: -4.009816646575928
training/temp_value: 0.6309636141465054
evaluation/return_mean_gm: 0.0
evaluation/return_std_gm: 0.0
evaluation/length_mean_gm: 4000.0
evaluation/length_std_gm: 0.0
time/evaluation: 63.81953430175781
time/total: 81493.11022377014

Model saved at ./exp/2023.03.20/222713-default/model.pt


  if not hasattr(tensorboard, "__version__") or LooseVersion(
  from urllib3.contrib.pyopenssl import orig_util_SSLContext as SSLContext
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  (np.object, string),
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  (np.bool, bool),
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  np.object: SlowAppendObjectArrayToTensorProto,
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  np.bool: SlowAppendBoolArrayToTensorProto,
  logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
Process ForkServerProcess-7:
Traceback (most recent call last):
  File "/home/gabriel/miniconda3/envs/odt/lib/python3.8/multiprocessing/process.py", line 315, in _boo

KeyboardInterrupt: 

In [None]:
def study_env(env):
    
    state_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]
    action_range = [
        float(env.action_space.low.min()) + 1e-6,
        float(env.action_space.high.max()) - 1e-6]
        
    print("state_dim: {}".format(state_dim))
    print("act_dim: {}".format(act_dim))
    print("action_range: {}".format(action_range))


In [None]:
my_env = make_pytorch_env(args)
their_env = gym.make('antmaze-large-diverse-v2')

In [None]:
study_env(my_env)

In [None]:
study_env(their_env)

In [None]:
my_env.reset()
my_env.step(2)

In [None]:
args

In [None]:
their_env.action_space

In [None]:
their_env.reset()
their_env.step(2)

In [None]:
#experiment.variant
#experiment.model.forward

In [None]:
loss

In [None]:
experiment.model.forward

In [None]:
experiment.model

In [None]:
import math
math.log(1e-310)

In [None]:
action_preds = torch.load('action_preds.pt')


In [None]:
a = torch.load("a.pt")

In [None]:
action_preds.log_likelihood(a)

In [None]:
sefude = action_preds.log_likelihood(a)

In [None]:
a

In [None]:
a

In [None]:
a

In [None]:
torch.nan_to_num(sefude)

In [None]:
action_preds

In [None]:
a[0][0]

In [None]:
math.log(-0.3)

In [None]:
action_preds.entropy().mean()

In [None]:
action_preds.log_likelihood(10)

In [None]:
action_preds.perplexity

In [None]:
import torch
state_dim = 4
hidden_size = 512

embed_state = torch.nn.Linear(state_dim, hidden_size).to('cuda')
embed_state_2 = torch.load('embed_state.pt').to('cuda')
states = torch.load('states.pt').to('cuda')
state_embeddings = embed_state(states)
state_embeddings_2 = torch.load('state_embeddings.pt').to('cuda')


In [None]:
states[0]

In [None]:
print("state_embeddings {}".format(state_embeddings))


In [None]:
print("state_embeddings 2 {}".format(state_embeddings_2))


In [None]:
embed_state.weight

In [None]:
embed_state_2.weight

In [None]:
embed_state

In [None]:
embed_state_2

In [None]:
embed_state(states)

In [None]:
embed_state_2(states)

In [None]:
stoppppppppppp

In [None]:
import torch
torch.__version__

In [None]:
!pip list | grep torch

In [None]:
!pip3 install torch --upgrade

In [None]:
# Normalizando as rewards pra ver se resolve o problema

In [None]:
import pickle

with open('data/drone_dataset.pkl', 'rb') as f:
    my_data = pickle.load(f)
    
with open('data/antmaze-large-diverse-v2.pkl', 'rb') as f:
    their_data = pickle.load(f)

In [None]:
for data in my_data:
    rewards = data['actions']
    print("max: {}".format(np.max(rewards)))
    print("min: {}".format(np.min(rewards)))
    print("mean: {}".format(np.mean(rewards)))
    print('----------------')

In [None]:
np.shape(my_data[0]['observations'])

In [None]:
np.shape(their_data[0]['observations'])

In [None]:
(v - v.min()) / (v.max() - v.min())