# Non-stationary

In [1]:
from copy import deepcopy
import itertools
import numpy as np
import torch
from torch.optim import Adam
import pybulletgym
import gym
import time
import spinup.algos.pytorch.td3.core as core
from spinup.utils.logx import EpochLogger


class ReplayBuffer:
    """
    A simple FIFO experience replay buffer for TD3 agents.
    """

    def __init__(self, obs_dim, act_dim, size):
        self.obs_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
        self.obs2_buf = np.zeros(core.combined_shape(size, obs_dim), dtype=np.float32)
        self.act_buf = np.zeros(core.combined_shape(size, act_dim), dtype=np.float32)
        self.rew_buf = np.zeros(size, dtype=np.float32)
        self.done_buf = np.zeros(size, dtype=np.float32)
        self.ptr, self.size, self.max_size = 0, 0, size

    def store(self, obs, act, rew, next_obs, done):
        self.obs_buf[self.ptr] = obs
        self.obs2_buf[self.ptr] = next_obs
        self.act_buf[self.ptr] = act
        self.rew_buf[self.ptr] = rew
        self.done_buf[self.ptr] = done
        self.ptr = (self.ptr+1) % self.max_size
        self.size = min(self.size+1, self.max_size)

    def sample_batch(self, batch_size=32):
        idxs = np.random.randint(0, self.size, size=batch_size)
        batch = dict(obs=self.obs_buf[idxs],
                     obs2=self.obs2_buf[idxs],
                     act=self.act_buf[idxs],
                     rew=self.rew_buf[idxs],
                     done=self.done_buf[idxs])
        return {k: torch.as_tensor(v, dtype=torch.float32) for k,v in batch.items()}





In [2]:
class POMDPWrapper(gym.ObservationWrapper):
    def __init__(self, env_name):
        super().__init__(gym.make(env_name))
        
        # Remove velocity info
        # OpenAIGym
        #  1. MuJoCo
        if env_name == "HalfCheetah-v3" or env_name == "HalfCheetah-v2":
            self.remain_obs_idx = np.arange(0, 8)
        elif env_name == "Ant-v3" or env_name == "Ant-v2":
            self.remain_obs_idx = list(np.arange(0, 13)) + list(np.arange(27, 111))
        elif env_name == 'Walker2d-v3' or env_name == "Walker2d-v2":
            self.remain_obs_idx = np.arange(0, 8)
        elif env_name == 'Hopper-v3' or env_name == "Hopper-v2":
            self.remain_obs_idx = np.arange(0, 5)
        elif env_name == "InvertedPendulum-v2":
            self.remain_obs_idx = np.arange(0, 2)
        elif env_name == "InvertedDoublePendulum-v2":
            self.remain_obs_idx = list(np.arange(0, 5)) + list(np.arange(8, 11))
        elif env_name == "Swimmer-v3" or env_name == "Swimmer-v2":
            self.remain_obs_idx = np.arange(0, 3)
        elif env_name == "Thrower-v2":
            self.remain_obs_idx = list(np.arange(0, 7)) + list(np.arange(14, 23))
        elif env_name == "Striker-v2":
            self.remain_obs_idx = list(np.arange(0, 7)) + list(np.arange(14, 23))
        elif env_name == "Pusher-v2":
            self.remain_obs_idx = list(np.arange(0, 7)) + list(np.arange(14, 23))
        elif env_name == "Reacher-v2":
            self.remain_obs_idx = list(np.arange(0, 6)) + list(np.arange(8, 11))
        elif env_name == 'Humanoid-v3' or env_name == "Humanoid-v2":
            self.remain_obs_idx = list(np.arange(0, 22)) + list(np.arange(45, 185)) + list(np.arange(269, 376))
        elif env_name == 'HumanoidStandup-v2':
            self.remain_obs_idx = list(np.arange(0, 22)) + list(np.arange(45, 185)) + list(np.arange(269, 376))
        # PyBulletGym
        #  1. MuJoCo
        elif env_name == 'HalfCheetahMuJoCoEnv-v0':
            self.remain_obs_idx = np.arange(0, 8)
        elif env_name == 'AntMuJoCoEnv-v0':
            self.remain_obs_idx = list(np.arange(0, 13)) + list(np.arange(27, 111))
        elif env_name == 'Walker2DMuJoCoEnv-v0':
            self.remain_obs_idx = np.arange(0, 8)
        elif env_name == 'HopperMuJoCoEnv-v0':
            self.remain_obs_idx = np.arange(0, 7)
        elif env_name == 'InvertedPendulumMuJoCoEnv-v0':
            self.remain_obs_idx = np.arange(0, 3)
        elif env_name == 'InvertedDoublePendulumMuJoCoEnv-v0':
            self.remain_obs_idx = list(np.arange(0, 5)) + list(np.arange(8, 11))
        #  2. Roboschool
        elif env_name == 'HalfCheetahPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,26)) - set(np.arange(3,6)))
        elif env_name ==  'AntPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,28)) - set(np.arange(3,6)))
        elif env_name == 'Walker2DPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,22)) - set(np.arange(3,6)))
        elif env_name == 'HopperPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,15)) - set(np.arange(3,6)))
        elif env_name == 'InvertedPendulumPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,5)) - set([1,4]))
        elif env_name == 'InvertedDoublePendulumPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,9)) - set([1,5,8]))
        elif env_name == 'ReacherPyBulletEnv-v0':
            self.remain_obs_idx = list(set(np.arange(0,9)) - set([6,8]))
        else:
            raise ValueError('POMDP for {} is not defined!'.format(env_name))
            
        # Redefine observation_space
        obs_low = np.array([-np.inf for i in range(len(self.remain_obs_idx))], dtype="float32")
        obs_high = np.array([np.inf for i in range(len(self.remain_obs_idx))], dtype="float32")
        self.observation_space = gym.spaces.Box(obs_low, obs_high)
        
    def observation(self, obs):
        return obs.flatten()[self.remain_obs_idx]
    

In [3]:
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence

In [4]:
class MLPCritic(nn.Module):
    def __init__(self, obs_dim, act_dim, hidden_sizes=(128, 128)):
        super(MLPCritic, self).__init__()
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        
        self.layers = nn.ModuleList()
        layer_size = [obs_dim+act_dim]+list(hidden_sizes) + [1]
        for h in range(len(layer_size)-2):
            self.layers += [nn.Linear(layer_size[h], layer_size[h+1]), nn.ReLU()]
        self.layers += [nn.Linear(layer_size[-2], layer_size[-1]), nn.Identity()]
    
    def forward(self, obs, act):
        cat_input = torch.cat([obs, act], dim=-1)
        x = cat_input
        for layer in self.layers:
            x = layer(x)
        return torch.squeeze(x, -1) # Critical to ensure q has right shape.

class MLPActor(nn.Module):
    def __init__(self, obs_dim, act_dim, act_limit, hidden_sizes=(128, 128)):
        super(MLPActor, self).__init__()
        self.obs_dim = obs_dim
        self.act_dim = act_dim
        self.act_limit = act_limit
        
        self.layers = nn.ModuleList()
        layer_size = [obs_dim]+list(hidden_sizes) + [act_dim]
        for h in range(len(layer_size)-2):
            self.layers += [nn.Linear(layer_size[h], layer_size[h+1]), nn.ReLU()]
        self.layers += [nn.Linear(layer_size[-2], layer_size[-1]), nn.Tanh()]
    
    def forward(self, obs):
        x = obs
        for layer in self.layers:
            x = layer(x)
        return self.act_limit * x

class MLPActorCritic(nn.Module):
    def __init__(self, obs_dim, act_dim, act_limit, hidden_sizes=(128, 128)):
        super(MLPActorCritic, self).__init__()
        self.q1 = MLPCritic(obs_dim, act_dim)
        self.q2 = MLPCritic(obs_dim, act_dim)
        self.pi = MLPActor(obs_dim, act_dim, act_limit=1)
    
    def act(self, obs):
        with torch.no_grad():
            return self.pi(obs).numpy() 
        

In [5]:
cuda = torch.device('cuda')

In [8]:
def td3(env_name, actor_critic=core.MLPActorCritic, ac_kwargs=dict(), seed=0, 
        steps_per_epoch=4000, epochs=100, replay_size=int(1e6), gamma=0.99, 
        polyak=0.995, pi_lr=1e-3, q_lr=1e-3, batch_size=100, start_steps=10000, 
        update_after=1000, update_every=50, act_noise=0.1, target_noise=0.2, 
        noise_clip=0.5, policy_delay=2, num_test_episodes=5, max_ep_len=1000, 
        nonstationary_env = True,
        gravity_change_pattern = 'gravity_averagely_equal',
        partially_observable = False,
        logger_kwargs=dict(), save_freq=1):
    """
    Twin Delayed Deep Deterministic Policy Gradient (TD3)


    Args:
        env_fn : A function which creates a copy of the environment.
            The environment must satisfy the OpenAI Gym API.

        actor_critic: The constructor method for a PyTorch Module with an ``act`` 
            method, a ``pi`` module, a ``q1`` module, and a ``q2`` module.
            The ``act`` method and ``pi`` module should accept batches of 
            observations as inputs, and ``q1`` and ``q2`` should accept a batch 
            of observations and a batch of actions as inputs. When called, 
            these should return:

            ===========  ================  ======================================
            Call         Output Shape      Description
            ===========  ================  ======================================
            ``act``      (batch, act_dim)  | Numpy array of actions for each 
                                           | observation.
            ``pi``       (batch, act_dim)  | Tensor containing actions from policy
                                           | given observations.
            ``q1``       (batch,)          | Tensor containing one current estimate
                                           | of Q* for the provided observations
                                           | and actions. (Critical: make sure to
                                           | flatten this!)
            ``q2``       (batch,)          | Tensor containing the other current 
                                           | estimate of Q* for the provided observations
                                           | and actions. (Critical: make sure to
                                           | flatten this!)
            ===========  ================  ======================================

        ac_kwargs (dict): Any kwargs appropriate for the ActorCritic object 
            you provided to TD3.

        seed (int): Seed for random number generators.

        steps_per_epoch (int): Number of steps of interaction (state-action pairs) 
            for the agent and the environment in each epoch.

        epochs (int): Number of epochs to run and train agent.

        replay_size (int): Maximum length of replay buffer.

        gamma (float): Discount factor. (Always between 0 and 1.)

        polyak (float): Interpolation factor in polyak averaging for target 
            networks. Target networks are updated towards main networks 
            according to:

            .. math:: \\theta_{\\text{targ}} \\leftarrow 
                \\rho \\theta_{\\text{targ}} + (1-\\rho) \\theta

            where :math:`\\rho` is polyak. (Always between 0 and 1, usually 
            close to 1.)

        pi_lr (float): Learning rate for policy.

        q_lr (float): Learning rate for Q-networks.

        batch_size (int): Minibatch size for SGD.

        start_steps (int): Number of steps for uniform-random action selection,
            before running real policy. Helps exploration.

        update_after (int): Number of env interactions to collect before
            starting to do gradient descent updates. Ensures replay buffer
            is full enough for useful updates.

        update_every (int): Number of env interactions that should elapse
            between gradient descent updates. Note: Regardless of how long 
            you wait between updates, the ratio of env steps to gradient steps 
            is locked to 1.

        act_noise (float): Stddev for Gaussian exploration noise added to 
            policy at training time. (At test time, no noise is added.)

        target_noise (float): Stddev for smoothing noise added to target 
            policy.

        noise_clip (float): Limit for absolute value of target policy 
            smoothing noise.

        policy_delay (int): Policy will only be updated once every 
            policy_delay times for each update of the Q-networks.

        num_test_episodes (int): Number of episodes to test the deterministic
            policy at the end of each epoch.

        max_ep_len (int): Maximum length of trajectory / episode / rollout.

        logger_kwargs (dict): Keyword args for EpochLogger.

        save_freq (int): How often (in terms of gap between epochs) to save
            the current policy and value function.

    """

    logger = EpochLogger(**logger_kwargs)
    logger.save_config(locals())

    torch.manual_seed(seed)
    np.random.seed(seed)

    # Wrapper environment if using POMDP
    if partially_observable == True:
        env, test_env = POMDPWrapper(env_name), POMDPWrapper(env_name)
    else:
        env, test_env = gym.make(env_name), gym.make(env_name)
    obs_dim = env.observation_space.shape[0]   
    act_dim = env.action_space.shape[0]

    # Action limit for clamping: critically, assumes all dimensions share the same bound!
    act_limit = env.action_space.high[0]

    # Create actor-critic module and target networks
    mlp_c1 = MLPCritic(obs_dim, act_dim)
    mlp_c2 = MLPCritic(obs_dim, act_dim)
    mlp_a = MLPActor(obs_dim, act_dim, act_limit)
    
    mlp_c1_targ = deepcopy(mlp_c1)
    mlp_c2_targ = deepcopy(mlp_c2)
    mlp_a_targ = deepcopy(mlp_a)
    mlp_c1.cuda()
    mlp_c2.cuda()
    mlp_a.cuda()
    mlp_c1_targ.cuda()
    mlp_c2_targ.cuda()
    mlp_a_targ.cuda()

    # Freeze target networks with respect to optimizers (only update via polyak averaging)
    for p in mlp_c1_targ.parameters():
        p.requires_grad = False
    for p in mlp_c2_targ.parameters():
        p.requires_grad = False
    for p in mlp_a_targ.parameters():
        p.requires_grad = False
        
    # List of parameters for both Q-networks (save this for convenience)
    q_params = itertools.chain(mlp_c1.parameters(), mlp_c2.parameters())

    # Experience buffer
    replay_buffer = ReplayBuffer(obs_dim=obs_dim, act_dim=act_dim, size=replay_size)

#     # Count variables (protip: try to get a feel for how different size networks behave!)
#     var_counts = tuple(core.count_vars(module) for module in [ac.pi, ac.q1, ac.q2])
#     logger.log('\nNumber of parameters: \t pi: %d, \t q1: %d, \t q2: %d\n'%var_counts)

    # Set up function for computing TD3 Q-losses
    def compute_loss_q(data):
        o, a, r, o2, d = data['obs'].to(device=cuda), data['act'].to(device=cuda), data['rew'].to(device=cuda), data['obs2'].to(device=cuda), data['done'].to(device=cuda)
        
        q1 = mlp_c1(o, a)
        q2 = mlp_c2(o, a)

        # Bellman backup for Q functions
        with torch.no_grad():
            pi_targ = mlp_a_targ(o2)
            a2 = pi_targ

            # Target Q-values
            q1_pi_targ = mlp_c1_targ(o2, a2)
            q2_pi_targ = mlp_c2_targ(o2, a2)
            q_pi_targ = torch.min(q1_pi_targ, q2_pi_targ)
            backup = r + gamma * (1 - d) * q_pi_targ

        # MSE loss against Bellman backup
        loss_q1 = ((q1 - backup)**2).mean()
        loss_q2 = ((q2 - backup)**2).mean()
        loss_q = loss_q1 + loss_q2

        # Useful info for logging
        loss_info = dict(Q1Vals=q1.detach().cpu().numpy(),
                         Q2Vals=q2.detach().cpu().numpy())

        return loss_q, loss_info

    # Set up function for computing TD3 pi loss
    def compute_loss_pi(data):
        o = data['obs'].to(device=cuda)
        q1_pi = mlp_c1(o, mlp_a(o))
        return -q1_pi.mean()

    # Set up optimizers for policy and q-function
    pi_optimizer = Adam(mlp_a.parameters(), lr=pi_lr)
    q_optimizer = Adam(q_params, lr=q_lr)

#     # Set up model saving
#     logger.setup_pytorch_saver(ac)

    def update(data, timer):
        # First run one gradient descent step for Q1 and Q2
        q_optimizer.zero_grad()
        loss_q, loss_info = compute_loss_q(data)
        loss_q.backward()
        q_optimizer.step()

        # Record things
        logger.store(LossQ=loss_q.item(), **loss_info)

        # Freeze Q-networks so you don't waste computational effort 
        # computing gradients for them during the policy learning step.
        for p in q_params:
            p.requires_grad = False

        # Next run one gradient descent step for pi.
        pi_optimizer.zero_grad()
        loss_pi = compute_loss_pi(data)
        loss_pi.backward()
        pi_optimizer.step()

        # Unfreeze Q-networks so you can optimize it at next DDPG step.
        for p in q_params:
            p.requires_grad = True

        # Record things
        logger.store(LossPi=loss_pi.item())

        # Finally, update target networks by polyak averaging.
        with torch.no_grad():
            for p, p_targ in zip(mlp_a.parameters(), mlp_a_targ.parameters()):
                p_targ.data.mul_(polyak)
                p_targ.data.add_((1 - polyak) * p.data)
            for p, p_targ in zip(mlp_c1.parameters(), mlp_c1_targ.parameters()):
                p_targ.data.mul_(polyak)
                p_targ.data.add_((1 - polyak) * p.data)
            for p, p_targ in zip(mlp_c2.parameters(), mlp_c2_targ.parameters()):
                p_targ.data.mul_(polyak)
                p_targ.data.add_((1 - polyak) * p.data)

    def get_action(o, noise_scale):
        o = torch.tensor(o).view(1, -1).float().to(device=cuda)
        with torch.no_grad():
            a = mlp_a(o)
        a = a.cpu().numpy().flatten()
        a += noise_scale * np.random.randn(act_dim)
        return np.clip(a, -act_limit, act_limit)

    def test_agent():
        for j in range(num_test_episodes):
            o, d, ep_ret, ep_len = test_env.reset(), False, 0, 0
            
            while not(d or (ep_len == max_ep_len)):
                # Take deterministic actions at test time (noise_scale=0)
                o, r, d, _ = test_env.step(get_action(o, 0))
                ep_ret += r
                ep_len += 1
            logger.store(TestEpRet=ep_ret, TestEpLen=ep_len)

    # Prepare for interaction with environment
    total_steps = steps_per_epoch * epochs
    start_time = time.time()
    o, ep_ret, ep_len = env.reset(), 0, 0

    # Main loop: collect experience in env and update/log each epoch
    for t in range(total_steps):
        
        # Until start_steps have elapsed, randomly sample actions
        # from a uniform distribution for better exploration. Afterwards, 
        # use the learned policy (with some noise, via act_noise). 
        if t > start_steps:
            a = get_action(o, act_noise)
        else:
            a = env.action_space.sample()
        
        if nonstationary_env == True:
            gravity_cycle = 1000
            gravity_base = -9.81
            if gravity_change_pattern == 'gravity_averagely_equal':
                gravity = gravity_base * 1 / 2 * (np.cos(2 * np.pi / gravity_cycle * t) + 1) + gravity_base / 2
            elif gravity_change_pattern == 'gravity_averagely_easier':
                gravity = gravity_base * 1 / 2 * (np.cos(2 * np.pi / gravity_cycle * t) + 1)
            elif gravity_change_pattern == 'gravity_averagely_harder':
                gravity = gravity_base * 1 / 2 * (-np.cos(2 * np.pi / gravity_cycle * t) + 1) + gravity_base
            else:
                pass

            if 'PyBulletEnv' in env_name:
                env.env._p.setGravity(0, 0, gravity)
            elif 'Roboschool' in env_name:
                pass
            else:
                env.model.opt.gravity[2] = gravity
        
        # Step the env
        o2, r, d, _ = env.step(a)
            
        ep_ret += r
        ep_len += 1

        # Ignore the "done" signal if it comes from hitting the time
        # horizon (that is, when it's an artificial terminal signal
        # that isn't based on the agent's state)
        d = False if ep_len==max_ep_len else d

        # Store experience to replay buffer
        replay_buffer.store(o, a, r, o2, d)

        # Super critical, easy to overlook step: make sure to update 
        # most recent observation!
        o = o2

        # End of trajectory handling
        if d or (ep_len == max_ep_len):
            logger.store(EpRet=ep_ret, EpLen=ep_len)
            o, ep_ret, ep_len = env.reset(), 0, 0

        # Update handling
        if t >= update_after and t % update_every == 0:
            for j in range(update_every):
                batch = replay_buffer.sample_batch(batch_size)
                update(data=batch, timer=j)

        # End of epoch handling
        if (t+1) % steps_per_epoch == 0:
            epoch = (t+1) // steps_per_epoch

#             # Save model
#             if (epoch % save_freq == 0) or (epoch == epochs):
#                 logger.save_state({'env': env}, None)

            # Test the performance of the deterministic version of the agent.
            test_agent()

            # Log info about epoch
            logger.log_tabular('Epoch', epoch)
            logger.log_tabular('EpRet', with_min_and_max=True)
            logger.log_tabular('TestEpRet', with_min_and_max=True)
            logger.log_tabular('EpLen', average_only=True)
            logger.log_tabular('TestEpLen', average_only=True)
            logger.log_tabular('TotalEnvInteracts', t)
            logger.log_tabular('Q1Vals', with_min_and_max=True)
            logger.log_tabular('Q2Vals', with_min_and_max=True)
            logger.log_tabular('LossPi', average_only=True)
            logger.log_tabular('LossQ', average_only=True)
            logger.log_tabular('Time', time.time()-start_time)
            logger.dump_tabular()



In [9]:
args = {'env': 'Ant-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': True,
        'exp_name': 'td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True\td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"Ant-v2",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x000001EFDC0CF2C8>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\users\\ling

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |             207 |
|          StdEpRet |             202 |
|          MaxEpRet |             429 |
|          MinEpRet |           -53.7 |
|  AverageTestEpRet |             517 |
|      StdTestEpRet |             103 |
|      MaxTestEpRet |             586 |
|      MinTestEpRet |             314 |
|             EpLen |             848 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            38.7 |
|         StdQ1Vals |              12 |
|         MaxQ1Vals |            67.6 |
|         MinQ1Vals |           -51.9 |
|     AverageQ2Vals |            38.7 |
|         StdQ2Vals |              12 |
|         MaxQ2Vals |            67.6 |
|         MinQ2Vals |           -50.4 |
|            LossPi |           -40.4 |
|             LossQ |            3.22 |
|              Time |             843 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |             549 |
|          StdEpRet |             267 |
|          MaxEpRet |             800 |
|          MinEpRet |            99.6 |
|  AverageTestEpRet |             805 |
|      StdTestEpRet |            48.8 |
|      MaxTestEpRet |             856 |
|      MinTestEpRet |             722 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            58.4 |
|         StdQ1Vals |            22.3 |
|         MaxQ1Vals |            86.6 |
|         MinQ1Vals |           -35.7 |
|     AverageQ2Vals |            58.4 |
|         StdQ2Vals |            22.3 |
|         MaxQ2Vals |            85.9 |
|         MinQ2Vals |             -56 |
|            LossPi |           -60.3 |
|             LossQ |            4.68 |
|              Time |        1.96e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             233 |
|          StdEpRet |             160 |
|          MaxEpRet |             491 |
|          MinEpRet |              51 |
|  AverageTestEpRet |             334 |
|      StdTestEpRet |              37 |
|      MaxTestEpRet |             399 |
|      MinTestEpRet |             284 |
|             EpLen |             449 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |            68.9 |
|         StdQ1Vals |            30.6 |
|         MaxQ1Vals |             100 |
|         MinQ1Vals |             -67 |
|     AverageQ2Vals |            68.9 |
|         StdQ2Vals |            30.6 |
|         MaxQ2Vals |            99.3 |
|         MinQ2Vals |           -81.9 |
|            LossPi |           -70.6 |
|             LossQ |            6.85 |
|              Time |        3.07e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |            40.9 |
|          StdEpRet |            89.5 |
|          MaxEpRet |             149 |
|          MinEpRet |            -122 |
|  AverageTestEpRet |           -28.2 |
|      StdTestEpRet |             389 |
|      MaxTestEpRet |             309 |
|      MinTestEpRet |            -769 |
|             EpLen |             411 |
|         TestEpLen |             621 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |            64.3 |
|         StdQ1Vals |            35.2 |
|         MaxQ1Vals |             174 |
|         MinQ1Vals |            -103 |
|     AverageQ2Vals |            64.3 |
|         StdQ2Vals |            35.3 |
|         MaxQ2Vals |             177 |
|         MinQ2Vals |            -111 |
|            LossPi |             -67 |
|             LossQ |            12.6 |
|              Time |        4.17e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |             198 |
|          StdEpRet |             173 |
|          MaxEpRet |             475 |
|          MinEpRet |            29.9 |
|  AverageTestEpRet |             145 |
|      StdTestEpRet |             144 |
|      MaxTestEpRet |             421 |
|      MinTestEpRet |            30.3 |
|             EpLen |             508 |
|         TestEpLen |             536 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |            49.2 |
|         StdQ1Vals |            38.2 |
|         MaxQ1Vals |            88.8 |
|         MinQ1Vals |            -112 |
|     AverageQ2Vals |            49.2 |
|         StdQ2Vals |            38.2 |
|         MaxQ2Vals |              88 |
|         MinQ2Vals |            -108 |
|            LossPi |           -50.9 |
|             LossQ |            10.2 |
|              Time |        5.28e+03 |
---------------------------------------


In [7]:
args = {'env': 'HalfCheetah-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': True,
        'exp_name': 'td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_True'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_True\td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_True_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetah-v2",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_True",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x000001EFDC067188>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_True",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
  



---------------------------------------
|             Epoch |               1 |
|      AverageEpRet |            -336 |
|          StdEpRet |            75.1 |
|          MaxEpRet |            -243 |
|          MinEpRet |            -445 |
|  AverageTestEpRet |            -318 |
|      StdTestEpRet |            3.27 |
|      MaxTestEpRet |            -314 |
|      MinTestEpRet |            -323 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           4e+03 |
|     AverageQ1Vals |            1.97 |
|         StdQ1Vals |            2.41 |
|         MaxQ1Vals |            10.8 |
|         MinQ1Vals |           -2.22 |
|     AverageQ2Vals |            1.97 |
|         StdQ2Vals |            2.41 |
|         MaxQ2Vals |            10.6 |
|         MinQ2Vals |           -2.14 |
|            LossPi |           -3.06 |
|             LossQ |            0.27 |
|              Time |            78.8 |
---------------------------------------


---------------------------------------
|             Epoch |              10 |
|      AverageEpRet |             289 |
|          StdEpRet |             432 |
|          MaxEpRet |             665 |
|          MinEpRet |            -445 |
|  AverageTestEpRet |             199 |
|      StdTestEpRet |             398 |
|      MaxTestEpRet |             551 |
|      MinTestEpRet |            -295 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           4e+04 |
|     AverageQ1Vals |            34.9 |
|         StdQ1Vals |            5.89 |
|         MaxQ1Vals |            47.4 |
|         MinQ1Vals |            21.6 |
|     AverageQ2Vals |            34.9 |
|         StdQ2Vals |            5.89 |
|         MaxQ2Vals |            47.6 |
|         MinQ2Vals |            21.7 |
|            LossPi |           -35.8 |
|             LossQ |           0.414 |
|              Time |        1.18e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              19 |
|      AverageEpRet |             955 |
|          StdEpRet |             212 |
|          MaxEpRet |        1.25e+03 |
|          MinEpRet |             731 |
|  AverageTestEpRet |             742 |
|      StdTestEpRet |             229 |
|      MaxTestEpRet |        1.18e+03 |
|      MinTestEpRet |             560 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         7.6e+04 |
|     AverageQ1Vals |            62.8 |
|         StdQ1Vals |            17.7 |
|         MaxQ1Vals |            97.3 |
|         MinQ1Vals |            43.4 |
|     AverageQ2Vals |            62.8 |
|         StdQ2Vals |            17.7 |
|         MaxQ2Vals |            97.3 |
|         MinQ2Vals |            43.6 |
|            LossPi |           -63.6 |
|             LossQ |             1.1 |
|              Time |         2.3e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              28 |
|      AverageEpRet |             958 |
|          StdEpRet |             274 |
|          MaxEpRet |        1.28e+03 |
|          MinEpRet |             684 |
|  AverageTestEpRet |        1.42e+03 |
|      StdTestEpRet |            52.4 |
|      MaxTestEpRet |        1.48e+03 |
|      MinTestEpRet |        1.34e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.12e+05 |
|     AverageQ1Vals |            82.7 |
|         StdQ1Vals |            35.5 |
|         MaxQ1Vals |             128 |
|         MinQ1Vals |           -9.36 |
|     AverageQ2Vals |            82.7 |
|         StdQ2Vals |            35.5 |
|         MaxQ2Vals |             129 |
|         MinQ2Vals |           -8.96 |
|            LossPi |           -83.3 |
|             LossQ |            2.06 |
|              Time |        3.46e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              37 |
|      AverageEpRet |        1.46e+03 |
|          StdEpRet |             128 |
|          MaxEpRet |        1.57e+03 |
|          MinEpRet |        1.24e+03 |
|  AverageTestEpRet |             997 |
|      StdTestEpRet |             350 |
|      MaxTestEpRet |        1.48e+03 |
|      MinTestEpRet |             404 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.48e+05 |
|     AverageQ1Vals |            98.1 |
|         StdQ1Vals |            41.5 |
|         MaxQ1Vals |             144 |
|         MinQ1Vals |            41.6 |
|     AverageQ2Vals |            98.1 |
|         StdQ2Vals |            41.5 |
|         MaxQ2Vals |             144 |
|         MinQ2Vals |            42.9 |
|            LossPi |           -98.8 |
|             LossQ |            3.61 |
|              Time |        4.65e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              46 |
|      AverageEpRet |        1.03e+03 |
|          StdEpRet |             371 |
|          MaxEpRet |        1.45e+03 |
|          MinEpRet |             480 |
|  AverageTestEpRet |        1.33e+03 |
|      StdTestEpRet |             504 |
|      MaxTestEpRet |        1.92e+03 |
|      MinTestEpRet |             694 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.84e+05 |
|     AverageQ1Vals |             110 |
|         StdQ1Vals |              49 |
|         MaxQ1Vals |             159 |
|         MinQ1Vals |            27.2 |
|     AverageQ2Vals |             110 |
|         StdQ2Vals |              49 |
|         MaxQ2Vals |             159 |
|         MinQ2Vals |            27.3 |
|            LossPi |            -111 |
|             LossQ |            4.78 |
|              Time |        5.83e+03 |
---------------------------------------


In [None]:
args = {'env': 'HalfCheetah-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': False,
        'exp_name': 'td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False\td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetah-v2",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D205A6508>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[]

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |        1.49e+03 |
|          StdEpRet |        1.02e+03 |
|          MaxEpRet |        2.14e+03 |
|          MinEpRet |            -269 |
|  AverageTestEpRet |         1.7e+03 |
|      StdTestEpRet |             441 |
|      MaxTestEpRet |        2.31e+03 |
|      MinTestEpRet |        1.07e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            69.5 |
|         StdQ1Vals |            31.3 |
|         MaxQ1Vals |             143 |
|         MinQ1Vals |            3.29 |
|     AverageQ2Vals |            69.5 |
|         StdQ2Vals |            31.3 |
|         MaxQ2Vals |             141 |
|         MinQ2Vals |            4.14 |
|            LossPi |           -71.2 |
|             LossQ |            7.78 |
|              Time |             835 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |        4.58e+03 |
|          StdEpRet |             176 |
|          MaxEpRet |        4.76e+03 |
|          MinEpRet |        4.33e+03 |
|  AverageTestEpRet |        4.38e+03 |
|      StdTestEpRet |             784 |
|      MaxTestEpRet |        4.84e+03 |
|      MinTestEpRet |        2.82e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |             209 |
|         StdQ1Vals |             134 |
|         MaxQ1Vals |             433 |
|         MinQ1Vals |           -63.4 |
|     AverageQ2Vals |             209 |
|         StdQ2Vals |             134 |
|         MaxQ2Vals |             433 |
|         MinQ2Vals |             -55 |
|            LossPi |            -212 |
|             LossQ |              57 |
|              Time |        1.99e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |        5.55e+03 |
|          StdEpRet |             238 |
|          MaxEpRet |        5.81e+03 |
|          MinEpRet |        5.31e+03 |
|  AverageTestEpRet |        4.18e+03 |
|      StdTestEpRet |        1.51e+03 |
|      MaxTestEpRet |        5.64e+03 |
|      MinTestEpRet |        1.74e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |             361 |
|         StdQ1Vals |             182 |
|         MaxQ1Vals |             584 |
|         MinQ1Vals |           -79.3 |
|     AverageQ2Vals |             361 |
|         StdQ2Vals |             182 |
|         MaxQ2Vals |             585 |
|         MinQ2Vals |           -88.1 |
|            LossPi |            -364 |
|             LossQ |            84.5 |
|              Time |        3.22e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |         6.1e+03 |
|          StdEpRet |             968 |
|          MaxEpRet |        6.78e+03 |
|          MinEpRet |        4.44e+03 |
|  AverageTestEpRet |        6.73e+03 |
|      StdTestEpRet |             164 |
|      MaxTestEpRet |         6.9e+03 |
|      MinTestEpRet |         6.5e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             459 |
|         StdQ1Vals |             204 |
|         MaxQ1Vals |             686 |
|         MinQ1Vals |            -137 |
|     AverageQ2Vals |             459 |
|         StdQ2Vals |             204 |
|         MaxQ2Vals |             684 |
|         MinQ2Vals |            -130 |
|            LossPi |            -462 |
|             LossQ |             102 |
|              Time |        4.53e+03 |
---------------------------------------


In [40]:
args = {'env': 'HopperPyBulletEnv-v0', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': False,
        'exp_name': 'td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_False'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_False\td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_False_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HopperPyBulletEnv-v0",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_False",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D1E2587C8>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_False",
            "first_row":	true,
            "log_current_row":	{},

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |             792 |
|          StdEpRet |            34.6 |
|          MaxEpRet |             835 |
|          MinEpRet |             744 |
|  AverageTestEpRet |             901 |
|      StdTestEpRet |            2.95 |
|      MaxTestEpRet |             904 |
|      MinTestEpRet |             896 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            68.2 |
|         StdQ1Vals |            24.9 |
|         MaxQ1Vals |            97.2 |
|         MinQ1Vals |           -24.9 |
|     AverageQ2Vals |            68.2 |
|         StdQ2Vals |            24.9 |
|         MaxQ2Vals |            97.7 |
|         MinQ2Vals |             -21 |
|            LossPi |           -71.7 |
|             LossQ |            6.54 |
|              Time |             705 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |             281 |
|          StdEpRet |             149 |
|          MaxEpRet |             658 |
|          MinEpRet |             147 |
|  AverageTestEpRet |             613 |
|      StdTestEpRet |             370 |
|      MaxTestEpRet |             915 |
|      MinTestEpRet |             138 |
|             EpLen |             219 |
|         TestEpLen |             644 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            84.2 |
|         StdQ1Vals |            25.3 |
|         MaxQ1Vals |             116 |
|         MinQ1Vals |           -28.7 |
|     AverageQ2Vals |            84.2 |
|         StdQ2Vals |            25.3 |
|         MaxQ2Vals |             116 |
|         MinQ2Vals |           -24.2 |
|            LossPi |           -86.3 |
|             LossQ |            5.66 |
|              Time |        1.69e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             516 |
|          StdEpRet |             342 |
|          MaxEpRet |        1.02e+03 |
|          MinEpRet |            84.2 |
|  AverageTestEpRet |             456 |
|      StdTestEpRet |             298 |
|      MaxTestEpRet |             928 |
|      MinTestEpRet |             151 |
|             EpLen |             546 |
|         TestEpLen |             433 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |            91.7 |
|         StdQ1Vals |            22.7 |
|         MaxQ1Vals |             132 |
|         MinQ1Vals |           -46.7 |
|     AverageQ2Vals |            91.7 |
|         StdQ2Vals |            22.7 |
|         MaxQ2Vals |             133 |
|         MinQ2Vals |           -49.4 |
|            LossPi |           -93.4 |
|             LossQ |            9.02 |
|              Time |        2.69e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |             778 |
|          StdEpRet |             571 |
|          MaxEpRet |         1.7e+03 |
|          MinEpRet |             216 |
|  AverageTestEpRet |             172 |
|      StdTestEpRet |            77.6 |
|      MaxTestEpRet |             252 |
|      MinTestEpRet |            76.1 |
|             EpLen |             474 |
|         TestEpLen |             160 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             110 |
|         StdQ1Vals |            26.8 |
|         MaxQ1Vals |             168 |
|         MinQ1Vals |           -48.1 |
|     AverageQ2Vals |             110 |
|         StdQ2Vals |            26.8 |
|         MaxQ2Vals |             170 |
|         MinQ2Vals |           -52.3 |
|            LossPi |            -112 |
|             LossQ |            12.7 |
|              Time |        3.66e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |        1.63e+03 |
|          StdEpRet |             494 |
|          MaxEpRet |        1.95e+03 |
|          MinEpRet |             776 |
|  AverageTestEpRet |        1.99e+03 |
|      StdTestEpRet |            14.2 |
|      MaxTestEpRet |        2.01e+03 |
|      MinTestEpRet |        1.97e+03 |
|             EpLen |             848 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |             132 |
|         StdQ1Vals |            32.3 |
|         MaxQ1Vals |             192 |
|         MinQ1Vals |           -48.7 |
|     AverageQ2Vals |             132 |
|         StdQ2Vals |            32.3 |
|         MaxQ2Vals |             188 |
|         MinQ2Vals |           -58.8 |
|            LossPi |            -134 |
|             LossQ |            14.4 |
|              Time |        4.66e+03 |
---------------------------------------


In [39]:
args = {'env': 'HopperPyBulletEnv-v0', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': True,
        'exp_name': 'td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_True'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_True\td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_True_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HopperPyBulletEnv-v0",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_True",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D1E16B808>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_HopperPyBulletEnv_NoTargSmooth_NoDelayUpdate_POMDP_True",
            "first_row":	true,
            "log_current_row":	{},
   

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |             290 |
|          StdEpRet |             297 |
|          MaxEpRet |             943 |
|          MinEpRet |             117 |
|  AverageTestEpRet |             326 |
|      StdTestEpRet |             330 |
|      MaxTestEpRet |             986 |
|      MinTestEpRet |             152 |
|             EpLen |             266 |
|         TestEpLen |             280 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            63.5 |
|         StdQ1Vals |            22.5 |
|         MaxQ1Vals |            90.4 |
|         MinQ1Vals |           -16.3 |
|     AverageQ2Vals |            63.5 |
|         StdQ2Vals |            22.5 |
|         MaxQ2Vals |            91.3 |
|         MinQ2Vals |           -19.4 |
|            LossPi |           -66.6 |
|             LossQ |            5.59 |
|              Time |             694 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |             360 |
|          StdEpRet |             308 |
|          MaxEpRet |             997 |
|          MinEpRet |            95.3 |
|  AverageTestEpRet |        1.05e+03 |
|      StdTestEpRet |             275 |
|      MaxTestEpRet |        1.52e+03 |
|      MinTestEpRet |             763 |
|             EpLen |             303 |
|         TestEpLen |             787 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |              83 |
|         StdQ1Vals |            24.5 |
|         MaxQ1Vals |             111 |
|         MinQ1Vals |           -28.2 |
|     AverageQ2Vals |              83 |
|         StdQ2Vals |            24.5 |
|         MaxQ2Vals |             112 |
|         MinQ2Vals |           -29.5 |
|            LossPi |             -85 |
|             LossQ |            7.37 |
|              Time |         1.7e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             722 |
|          StdEpRet |             426 |
|          MaxEpRet |         1.3e+03 |
|          MinEpRet |             143 |
|  AverageTestEpRet |        1.01e+03 |
|      StdTestEpRet |              61 |
|      MaxTestEpRet |        1.13e+03 |
|      MinTestEpRet |             971 |
|             EpLen |             542 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |            89.6 |
|         StdQ1Vals |            26.7 |
|         MaxQ1Vals |             122 |
|         MinQ1Vals |           -46.8 |
|     AverageQ2Vals |            89.6 |
|         StdQ2Vals |            26.8 |
|         MaxQ2Vals |             118 |
|         MinQ2Vals |           -52.5 |
|            LossPi |           -91.4 |
|             LossQ |            11.4 |
|              Time |        2.76e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |             673 |
|          StdEpRet |             428 |
|          MaxEpRet |        1.33e+03 |
|          MinEpRet |             146 |
|  AverageTestEpRet |             766 |
|      StdTestEpRet |             294 |
|      MaxTestEpRet |        1.19e+03 |
|      MinTestEpRet |             398 |
|             EpLen |             520 |
|         TestEpLen |             660 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             102 |
|         StdQ1Vals |            26.3 |
|         MaxQ1Vals |             135 |
|         MinQ1Vals |           -39.6 |
|     AverageQ2Vals |             102 |
|         StdQ2Vals |            26.3 |
|         MaxQ2Vals |             137 |
|         MinQ2Vals |           -47.1 |
|            LossPi |            -104 |
|             LossQ |            13.2 |
|              Time |        3.79e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |             293 |
|          StdEpRet |             121 |
|          MaxEpRet |             523 |
|          MinEpRet |             122 |
|  AverageTestEpRet |             185 |
|      StdTestEpRet |            35.7 |
|      MaxTestEpRet |             256 |
|      MinTestEpRet |             160 |
|             EpLen |             187 |
|         TestEpLen |             106 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |             109 |
|         StdQ1Vals |            28.6 |
|         MaxQ1Vals |             145 |
|         MinQ1Vals |           -60.3 |
|     AverageQ2Vals |             109 |
|         StdQ2Vals |            28.6 |
|         MaxQ2Vals |             145 |
|         MinQ2Vals |           -68.2 |
|            LossPi |            -111 |
|             LossQ |            15.7 |
|              Time |         4.8e+03 |
---------------------------------------


In [35]:
args = {'env': 'AntMuJoCoEnv-v0', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': True,
        'exp_name': 'td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True\td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"AntMuJoCoEnv-v0",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D2880FD08>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_True",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\us

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |             339 |
|          StdEpRet |            39.4 |
|          MaxEpRet |             405 |
|          MinEpRet |             299 |
|  AverageTestEpRet |             316 |
|      StdTestEpRet |            3.87 |
|      MaxTestEpRet |             319 |
|      MinTestEpRet |             309 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            44.8 |
|         StdQ1Vals |            7.84 |
|         MaxQ1Vals |            66.5 |
|         MinQ1Vals |            16.1 |
|     AverageQ2Vals |            44.8 |
|         StdQ2Vals |            7.84 |
|         MaxQ2Vals |            66.7 |
|         MinQ2Vals |            15.9 |
|            LossPi |             -45 |
|             LossQ |           0.102 |
|              Time |             868 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |             602 |
|          StdEpRet |            75.8 |
|          MaxEpRet |             695 |
|          MinEpRet |             496 |
|  AverageTestEpRet |             476 |
|      StdTestEpRet |             108 |
|      MaxTestEpRet |             677 |
|      MinTestEpRet |             353 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            60.9 |
|         StdQ1Vals |            9.11 |
|         MaxQ1Vals |            79.7 |
|         MinQ1Vals |            35.6 |
|     AverageQ2Vals |            60.9 |
|         StdQ2Vals |            9.11 |
|         MaxQ2Vals |            79.6 |
|         MinQ2Vals |            35.2 |
|            LossPi |           -61.7 |
|             LossQ |           0.167 |
|              Time |        2.05e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             740 |
|          StdEpRet |            50.1 |
|          MaxEpRet |             811 |
|          MinEpRet |             673 |
|  AverageTestEpRet |             748 |
|      StdTestEpRet |            39.3 |
|      MaxTestEpRet |             797 |
|      MinTestEpRet |             689 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |            64.7 |
|         StdQ1Vals |            6.33 |
|         MaxQ1Vals |            81.4 |
|         MinQ1Vals |            41.9 |
|     AverageQ2Vals |            64.7 |
|         StdQ2Vals |            6.33 |
|         MaxQ2Vals |            81.1 |
|         MinQ2Vals |            41.8 |
|            LossPi |           -65.4 |
|             LossQ |           0.345 |
|              Time |        3.21e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |             872 |
|          StdEpRet |             212 |
|          MaxEpRet |        1.03e+03 |
|          MinEpRet |             512 |
|  AverageTestEpRet |             928 |
|      StdTestEpRet |             181 |
|      MaxTestEpRet |        1.12e+03 |
|      MinTestEpRet |             584 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |            73.3 |
|         StdQ1Vals |            10.9 |
|         MaxQ1Vals |             104 |
|         MinQ1Vals |            22.4 |
|     AverageQ2Vals |            73.3 |
|         StdQ2Vals |            10.9 |
|         MaxQ2Vals |             104 |
|         MinQ2Vals |            22.5 |
|            LossPi |           -74.2 |
|             LossQ |           0.844 |
|              Time |        4.38e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |        1.01e+03 |
|          StdEpRet |             133 |
|          MaxEpRet |        1.13e+03 |
|          MinEpRet |             796 |
|  AverageTestEpRet |             997 |
|      StdTestEpRet |             135 |
|      MaxTestEpRet |        1.12e+03 |
|      MinTestEpRet |             741 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |            83.5 |
|         StdQ1Vals |            10.8 |
|         MaxQ1Vals |             108 |
|         MinQ1Vals |            35.1 |
|     AverageQ2Vals |            83.5 |
|         StdQ2Vals |            10.8 |
|         MaxQ2Vals |             107 |
|         MinQ2Vals |            33.1 |
|            LossPi |           -84.2 |
|             LossQ |            0.71 |
|              Time |        5.56e+03 |
---------------------------------------


In [34]:
args = {'env': 'AntMuJoCoEnv-v0', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': False,
        'exp_name': 'td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_False'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_False\td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_False_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"AntMuJoCoEnv-v0",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_False",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D1E23EAC8>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_Ant_NoTargSmooth_NoDelayUpdate_POMDP_False",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |             321 |
|          StdEpRet |              40 |
|          MaxEpRet |             386 |
|          MinEpRet |             276 |
|  AverageTestEpRet |             273 |
|      StdTestEpRet |            31.9 |
|      MaxTestEpRet |             294 |
|      MinTestEpRet |             210 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |             130 |
|         StdQ1Vals |            10.8 |
|         MaxQ1Vals |             162 |
|         MinQ1Vals |            95.7 |
|     AverageQ2Vals |             130 |
|         StdQ2Vals |            10.8 |
|         MaxQ2Vals |             161 |
|         MinQ2Vals |            95.6 |
|            LossPi |            -131 |
|             LossQ |           0.676 |
|              Time |             805 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |             679 |
|          StdEpRet |              49 |
|          MaxEpRet |             758 |
|          MinEpRet |             631 |
|  AverageTestEpRet |             777 |
|      StdTestEpRet |            69.2 |
|      MaxTestEpRet |             834 |
|      MinTestEpRet |             644 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            85.5 |
|         StdQ1Vals |            6.44 |
|         MaxQ1Vals |             107 |
|         MinQ1Vals |            45.7 |
|     AverageQ2Vals |            85.5 |
|         StdQ2Vals |            6.44 |
|         MaxQ2Vals |             107 |
|         MinQ2Vals |            45.2 |
|            LossPi |           -86.1 |
|             LossQ |           0.644 |
|              Time |        1.96e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             779 |
|          StdEpRet |             125 |
|          MaxEpRet |             989 |
|          MinEpRet |             670 |
|  AverageTestEpRet |             828 |
|      StdTestEpRet |             128 |
|      MaxTestEpRet |        1.03e+03 |
|      MinTestEpRet |             641 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |              91 |
|         StdQ1Vals |             8.9 |
|         MaxQ1Vals |             135 |
|         MinQ1Vals |            41.8 |
|     AverageQ2Vals |              91 |
|         StdQ2Vals |             8.9 |
|         MaxQ2Vals |             136 |
|         MinQ2Vals |            41.8 |
|            LossPi |           -91.7 |
|             LossQ |            1.27 |
|              Time |        3.12e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |         1.4e+03 |
|          StdEpRet |            66.3 |
|          MaxEpRet |        1.52e+03 |
|          MinEpRet |        1.35e+03 |
|  AverageTestEpRet |        1.27e+03 |
|      StdTestEpRet |             455 |
|      MaxTestEpRet |        1.61e+03 |
|      MinTestEpRet |             386 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             103 |
|         StdQ1Vals |            13.1 |
|         MaxQ1Vals |             182 |
|         MinQ1Vals |            47.7 |
|     AverageQ2Vals |             103 |
|         StdQ2Vals |            13.1 |
|         MaxQ2Vals |             181 |
|         MinQ2Vals |            48.9 |
|            LossPi |            -104 |
|             LossQ |            2.78 |
|              Time |        4.48e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |        1.51e+03 |
|          StdEpRet |             140 |
|          MaxEpRet |        1.73e+03 |
|          MinEpRet |        1.34e+03 |
|  AverageTestEpRet |         1.7e+03 |
|      StdTestEpRet |              89 |
|      MaxTestEpRet |         1.8e+03 |
|      MinTestEpRet |        1.54e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |             113 |
|         StdQ1Vals |            22.9 |
|         MaxQ1Vals |             172 |
|         MinQ1Vals |            26.7 |
|     AverageQ2Vals |             113 |
|         StdQ2Vals |            22.9 |
|         MaxQ2Vals |             171 |
|         MinQ2Vals |            27.2 |
|            LossPi |            -114 |
|             LossQ |            4.07 |
|              Time |         5.7e+03 |
---------------------------------------


In [32]:
args = {'env': 'HalfCheetahMuJoCoEnv-v0', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': False,
        'exp_name': 'td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False\td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetahMuJoCoEnv-v0",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D20582508>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_False_HalfCheetah_NoTargSmooth_NoDelayUpdate_POMDP_False",
            "first_row":	true,
            "log_current_row":	{},
            "log_hea

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |            -545 |
|          StdEpRet |            6.51 |
|          MaxEpRet |            -535 |
|          MinEpRet |            -551 |
|  AverageTestEpRet |            -592 |
|      StdTestEpRet |            1.29 |
|      MaxTestEpRet |            -591 |
|      MinTestEpRet |            -594 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |             207 |
|         StdQ1Vals |            17.1 |
|         MaxQ1Vals |             266 |
|         MinQ1Vals |             154 |
|     AverageQ2Vals |             207 |
|         StdQ2Vals |            17.1 |
|         MaxQ2Vals |             267 |
|         MinQ2Vals |             155 |
|            LossPi |            -208 |
|             LossQ |            3.02 |
|              Time |             734 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |        1.29e+03 |
|          StdEpRet |            55.3 |
|          MaxEpRet |        1.35e+03 |
|          MinEpRet |        1.21e+03 |
|  AverageTestEpRet |        1.39e+03 |
|      StdTestEpRet |            90.3 |
|      MaxTestEpRet |         1.5e+03 |
|      MinTestEpRet |        1.24e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            92.1 |
|         StdQ1Vals |            12.7 |
|         MaxQ1Vals |             127 |
|         MinQ1Vals |            52.8 |
|     AverageQ2Vals |            92.1 |
|         StdQ2Vals |            12.7 |
|         MaxQ2Vals |             127 |
|         MinQ2Vals |            51.8 |
|            LossPi |           -94.3 |
|             LossQ |            2.62 |
|              Time |        1.74e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |        1.59e+03 |
|          StdEpRet |            88.2 |
|          MaxEpRet |        1.66e+03 |
|          MinEpRet |        1.44e+03 |
|  AverageTestEpRet |        1.63e+03 |
|      StdTestEpRet |            46.6 |
|      MaxTestEpRet |        1.69e+03 |
|      MinTestEpRet |        1.56e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |             131 |
|         StdQ1Vals |            24.4 |
|         MaxQ1Vals |             171 |
|         MinQ1Vals |           -23.8 |
|     AverageQ2Vals |             131 |
|         StdQ2Vals |            24.4 |
|         MaxQ2Vals |             172 |
|         MinQ2Vals |           -20.8 |
|            LossPi |            -132 |
|             LossQ |            4.11 |
|              Time |        2.78e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |        1.93e+03 |
|          StdEpRet |            29.9 |
|          MaxEpRet |        1.98e+03 |
|          MinEpRet |        1.91e+03 |
|  AverageTestEpRet |        1.88e+03 |
|      StdTestEpRet |              27 |
|      MaxTestEpRet |         1.9e+03 |
|      MinTestEpRet |        1.83e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             163 |
|         StdQ1Vals |            26.6 |
|         MaxQ1Vals |             203 |
|         MinQ1Vals |              42 |
|     AverageQ2Vals |             163 |
|         StdQ2Vals |            26.6 |
|         MaxQ2Vals |             202 |
|         MinQ2Vals |            33.6 |
|            LossPi |            -165 |
|             LossQ |            4.31 |
|              Time |        3.82e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |        1.97e+03 |
|          StdEpRet |              45 |
|          MaxEpRet |        2.01e+03 |
|          MinEpRet |        1.89e+03 |
|  AverageTestEpRet |        1.92e+03 |
|      StdTestEpRet |            12.6 |
|      MaxTestEpRet |        1.94e+03 |
|      MinTestEpRet |        1.91e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |             177 |
|         StdQ1Vals |            44.1 |
|         MaxQ1Vals |             219 |
|         MinQ1Vals |           -90.3 |
|     AverageQ2Vals |             177 |
|         StdQ2Vals |            44.1 |
|         MaxQ2Vals |             220 |
|         MinQ2Vals |           -86.3 |
|            LossPi |            -179 |
|             LossQ |            6.58 |
|              Time |        4.88e+03 |
---------------------------------------


In [31]:
args = {'env': 'HalfCheetahMuJoCoEnv-v0', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':False,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': True,
        'exp_name': 'td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetahMuJoCoEnv-v0",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D1073C388>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\users\\lingheng\\google drive\

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |            -334 |
|          StdEpRet |            87.6 |
|          MaxEpRet |            -245 |
|          MinEpRet |            -474 |
|  AverageTestEpRet |            -326 |
|      StdTestEpRet |            43.8 |
|      MaxTestEpRet |            -252 |
|      MinTestEpRet |            -381 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            26.4 |
|         StdQ1Vals |            11.7 |
|         MaxQ1Vals |            64.2 |
|         MinQ1Vals |           -18.5 |
|     AverageQ2Vals |            26.4 |
|         StdQ2Vals |            11.7 |
|         MaxQ2Vals |            65.5 |
|         MinQ2Vals |           -18.5 |
|            LossPi |           -27.3 |
|             LossQ |           0.603 |
|              Time |        1.05e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |            48.1 |
|          StdEpRet |            43.7 |
|          MaxEpRet |             116 |
|          MinEpRet |            -3.2 |
|  AverageTestEpRet |           -31.1 |
|      StdTestEpRet |            15.4 |
|      MaxTestEpRet |           -7.14 |
|      MinTestEpRet |             -52 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            6.73 |
|         StdQ1Vals |            7.08 |
|         MaxQ1Vals |            22.5 |
|         MinQ1Vals |             -26 |
|     AverageQ2Vals |            6.73 |
|         StdQ2Vals |            7.08 |
|         MaxQ2Vals |            22.5 |
|         MinQ2Vals |             -26 |
|            LossPi |           -7.54 |
|             LossQ |           0.487 |
|              Time |         2.2e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             150 |
|          StdEpRet |             411 |
|          MaxEpRet |             401 |
|          MinEpRet |            -563 |
|  AverageTestEpRet |             221 |
|      StdTestEpRet |             381 |
|      MaxTestEpRet |             441 |
|      MinTestEpRet |            -538 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |            15.1 |
|         StdQ1Vals |            8.01 |
|         MaxQ1Vals |            32.9 |
|         MinQ1Vals |           -81.2 |
|     AverageQ2Vals |            15.1 |
|         StdQ2Vals |            8.01 |
|         MaxQ2Vals |              32 |
|         MinQ2Vals |           -88.3 |
|            LossPi |           -15.8 |
|             LossQ |           0.628 |
|              Time |        3.28e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |             321 |
|          StdEpRet |            40.4 |
|          MaxEpRet |             369 |
|          MinEpRet |             258 |
|  AverageTestEpRet |             412 |
|      StdTestEpRet |            29.1 |
|      MaxTestEpRet |             456 |
|      MinTestEpRet |             370 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |            24.5 |
|         StdQ1Vals |            9.54 |
|         MaxQ1Vals |            38.6 |
|         MinQ1Vals |           -66.5 |
|     AverageQ2Vals |            24.5 |
|         StdQ2Vals |            9.54 |
|         MaxQ2Vals |            39.1 |
|         MinQ2Vals |             -67 |
|            LossPi |           -25.2 |
|             LossQ |           0.833 |
|              Time |        4.54e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |             354 |
|          StdEpRet |             123 |
|          MaxEpRet |             551 |
|          MinEpRet |             213 |
|  AverageTestEpRet |             355 |
|      StdTestEpRet |            28.9 |
|      MaxTestEpRet |             400 |
|      MinTestEpRet |             312 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |            27.7 |
|         StdQ1Vals |            11.5 |
|         MaxQ1Vals |            40.7 |
|         MinQ1Vals |           -55.2 |
|     AverageQ2Vals |            27.7 |
|         StdQ2Vals |            11.5 |
|         MaxQ2Vals |            41.1 |
|         MinQ2Vals |           -56.3 |
|            LossPi |           -28.2 |
|             LossQ |           0.965 |
|              Time |         5.8e+03 |
---------------------------------------


In [17]:
args = {'env': 'HalfCheetah-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':True,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': False,
        'exp_name': 'td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetah-v2",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D20E52588>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\users\\lingheng\\google drive\\git_repo

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |             522 |
|          StdEpRet |             229 |
|          MaxEpRet |             744 |
|          MinEpRet |             209 |
|  AverageTestEpRet |        1.39e+03 |
|      StdTestEpRet |             422 |
|      MaxTestEpRet |        1.78e+03 |
|      MinTestEpRet |             769 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            32.6 |
|         StdQ1Vals |            22.6 |
|         MaxQ1Vals |            81.6 |
|         MinQ1Vals |           -30.4 |
|     AverageQ2Vals |            32.6 |
|         StdQ2Vals |            22.6 |
|         MaxQ2Vals |            81.4 |
|         MinQ2Vals |           -28.9 |
|            LossPi |             -34 |
|             LossQ |            5.86 |
|              Time |             748 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |        1.75e+03 |
|          StdEpRet |             618 |
|          MaxEpRet |        2.38e+03 |
|          MinEpRet |        1.08e+03 |
|  AverageTestEpRet |        2.56e+03 |
|      StdTestEpRet |            88.6 |
|      MaxTestEpRet |        2.68e+03 |
|      MinTestEpRet |        2.36e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |             111 |
|         StdQ1Vals |            78.7 |
|         MaxQ1Vals |             227 |
|         MinQ1Vals |           -62.9 |
|     AverageQ2Vals |             111 |
|         StdQ2Vals |            78.7 |
|         MaxQ2Vals |             227 |
|         MinQ2Vals |             -64 |
|            LossPi |            -113 |
|             LossQ |            17.8 |
|              Time |        1.76e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |        2.56e+03 |
|          StdEpRet |             783 |
|          MaxEpRet |        3.12e+03 |
|          MinEpRet |        1.21e+03 |
|  AverageTestEpRet |        3.23e+03 |
|      StdTestEpRet |            84.4 |
|      MaxTestEpRet |        3.39e+03 |
|      MinTestEpRet |         3.1e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |             179 |
|         StdQ1Vals |            98.7 |
|         MaxQ1Vals |             308 |
|         MinQ1Vals |           -68.1 |
|     AverageQ2Vals |             179 |
|         StdQ2Vals |            98.7 |
|         MaxQ2Vals |             307 |
|         MinQ2Vals |           -73.5 |
|            LossPi |            -181 |
|             LossQ |            29.2 |
|              Time |        2.76e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |        3.54e+03 |
|          StdEpRet |             166 |
|          MaxEpRet |        3.68e+03 |
|          MinEpRet |        3.26e+03 |
|  AverageTestEpRet |        3.85e+03 |
|      StdTestEpRet |            95.6 |
|      MaxTestEpRet |        4.05e+03 |
|      MinTestEpRet |        3.71e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             241 |
|         StdQ1Vals |             106 |
|         MaxQ1Vals |             365 |
|         MinQ1Vals |           -83.7 |
|     AverageQ2Vals |             241 |
|         StdQ2Vals |             106 |
|         MaxQ2Vals |             363 |
|         MinQ2Vals |           -89.6 |
|            LossPi |            -243 |
|             LossQ |            41.2 |
|              Time |        3.72e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |        2.82e+03 |
|          StdEpRet |        1.13e+03 |
|          MaxEpRet |        4.04e+03 |
|          MinEpRet |        1.35e+03 |
|  AverageTestEpRet |        4.39e+03 |
|      StdTestEpRet |             160 |
|      MaxTestEpRet |        4.52e+03 |
|      MinTestEpRet |        4.04e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |             294 |
|         StdQ1Vals |             115 |
|         MaxQ1Vals |             420 |
|         MinQ1Vals |            -119 |
|     AverageQ2Vals |             294 |
|         StdQ2Vals |             115 |
|         MaxQ2Vals |             421 |
|         MinQ2Vals |           -85.2 |
|            LossPi |            -296 |
|             LossQ |            47.6 |
|              Time |        4.66e+03 |
---------------------------------------


In [15]:
args = {'env': 'HalfCheetah-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':True,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'partially_observable': True,
        'exp_name': 'td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
    partially_observable=args['partially_observable'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetah-v2",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000024D209EEF48>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\users\\lingheng\\google drive\\git_repo

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |            -496 |
|          StdEpRet |            88.8 |
|          MaxEpRet |            -402 |
|          MinEpRet |            -603 |
|  AverageTestEpRet |            -339 |
|      StdTestEpRet |             282 |
|      MaxTestEpRet |            22.8 |
|      MinTestEpRet |            -586 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            12.9 |
|         StdQ1Vals |            4.81 |
|         MaxQ1Vals |            27.6 |
|         MinQ1Vals |          -0.277 |
|     AverageQ2Vals |            12.9 |
|         StdQ2Vals |            4.81 |
|         MaxQ2Vals |              28 |
|         MinQ2Vals |          -0.261 |
|            LossPi |             -14 |
|             LossQ |           0.327 |
|              Time |             674 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |            46.1 |
|          StdEpRet |            34.4 |
|          MaxEpRet |            91.8 |
|          MinEpRet |           0.707 |
|  AverageTestEpRet |             184 |
|      StdTestEpRet |             210 |
|      MaxTestEpRet |             535 |
|      MinTestEpRet |           -4.61 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |            8.76 |
|         StdQ1Vals |            10.9 |
|         MaxQ1Vals |            26.4 |
|         MinQ1Vals |           -11.7 |
|     AverageQ2Vals |            8.76 |
|         StdQ2Vals |            10.9 |
|         MaxQ2Vals |            26.1 |
|         MinQ2Vals |             -12 |
|            LossPi |           -9.39 |
|             LossQ |           0.251 |
|              Time |        1.61e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |             434 |
|          StdEpRet |             300 |
|          MaxEpRet |             953 |
|          MinEpRet |             245 |
|  AverageTestEpRet |             525 |
|      StdTestEpRet |             197 |
|      MaxTestEpRet |             839 |
|      MinTestEpRet |             239 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |            42.9 |
|         StdQ1Vals |            28.7 |
|         MaxQ1Vals |            86.7 |
|         MinQ1Vals |           -21.4 |
|     AverageQ2Vals |            42.9 |
|         StdQ2Vals |            28.7 |
|         MaxQ2Vals |            86.4 |
|         MinQ2Vals |           -21.7 |
|            LossPi |           -44.1 |
|             LossQ |            2.73 |
|              Time |        2.54e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |             519 |
|          StdEpRet |             340 |
|          MaxEpRet |             993 |
|          MinEpRet |            42.5 |
|  AverageTestEpRet |             938 |
|      StdTestEpRet |             360 |
|      MaxTestEpRet |        1.39e+03 |
|      MinTestEpRet |             305 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |              59 |
|         StdQ1Vals |            42.1 |
|         MaxQ1Vals |             118 |
|         MinQ1Vals |           -1.48 |
|     AverageQ2Vals |              59 |
|         StdQ2Vals |            42.1 |
|         MaxQ2Vals |             118 |
|         MinQ2Vals |           -2.36 |
|            LossPi |           -60.2 |
|             LossQ |            4.08 |
|              Time |        3.49e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |             684 |
|          StdEpRet |             278 |
|          MaxEpRet |        1.13e+03 |
|          MinEpRet |             391 |
|  AverageTestEpRet |             742 |
|      StdTestEpRet |             409 |
|      MaxTestEpRet |        1.36e+03 |
|      MinTestEpRet |             295 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |            63.5 |
|         StdQ1Vals |            47.1 |
|         MaxQ1Vals |             123 |
|         MinQ1Vals |           0.149 |
|     AverageQ2Vals |            63.5 |
|         StdQ2Vals |            47.1 |
|         MaxQ2Vals |             123 |
|         MinQ2Vals |          -0.451 |
|            LossPi |           -64.5 |
|             LossQ |            4.89 |
|              Time |        4.45e+03 |
---------------------------------------


In [11]:
args = {'env': 'HalfCheetah-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':True,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'exp_name': 'td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate\td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_name":	"HalfCheetah-v2",
    "epochs":	50,
    "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
    "gamma":	0.99,
    "gravity_change_pattern":	"gravity_averagely_equal",
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000021F94475D48>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_NonStationary_HalfCheetah_NoTargSmooth_NoDelayUpdate",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\users\\lingheng\\google drive\\git_repo

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |        1.18e+03 |
|          StdEpRet |             285 |
|          MaxEpRet |        1.63e+03 |
|          MinEpRet |             913 |
|  AverageTestEpRet |        1.78e+03 |
|      StdTestEpRet |             108 |
|      MaxTestEpRet |        1.96e+03 |
|      MinTestEpRet |         1.6e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            29.5 |
|         StdQ1Vals |            26.3 |
|         MaxQ1Vals |             103 |
|         MinQ1Vals |           -26.8 |
|     AverageQ2Vals |            29.5 |
|         StdQ2Vals |            26.3 |
|         MaxQ2Vals |             103 |
|         MinQ2Vals |           -28.7 |
|            LossPi |           -30.8 |
|             LossQ |            6.48 |
|              Time |             820 |
---------------------------------------


---------------------------------------
|             Epoch |              16 |
|      AverageEpRet |        3.56e+03 |
|          StdEpRet |            78.1 |
|          MaxEpRet |        3.67e+03 |
|          MinEpRet |        3.47e+03 |
|  AverageTestEpRet |        3.48e+03 |
|      StdTestEpRet |            43.5 |
|      MaxTestEpRet |        3.56e+03 |
|      MinTestEpRet |         3.4e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         6.4e+04 |
|     AverageQ1Vals |             196 |
|         StdQ1Vals |            93.3 |
|         MaxQ1Vals |             320 |
|         MinQ1Vals |           -10.8 |
|     AverageQ2Vals |             196 |
|         StdQ2Vals |            93.3 |
|         MaxQ2Vals |             321 |
|         MinQ2Vals |           -16.2 |
|            LossPi |            -198 |
|             LossQ |            17.3 |
|              Time |         1.9e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              25 |
|      AverageEpRet |         4.6e+03 |
|          StdEpRet |             106 |
|          MaxEpRet |        4.76e+03 |
|          MinEpRet |        4.49e+03 |
|  AverageTestEpRet |        4.81e+03 |
|      StdTestEpRet |             107 |
|      MaxTestEpRet |        4.97e+03 |
|      MinTestEpRet |         4.6e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |           1e+05 |
|     AverageQ1Vals |             333 |
|         StdQ1Vals |            85.6 |
|         MaxQ1Vals |             451 |
|         MinQ1Vals |            52.2 |
|     AverageQ2Vals |             333 |
|         StdQ2Vals |            85.6 |
|         MaxQ2Vals |             451 |
|         MinQ2Vals |            43.1 |
|            LossPi |            -336 |
|             LossQ |            33.1 |
|              Time |        2.96e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              34 |
|      AverageEpRet |        4.99e+03 |
|          StdEpRet |            73.2 |
|          MaxEpRet |        5.09e+03 |
|          MinEpRet |        4.89e+03 |
|  AverageTestEpRet |        5.77e+03 |
|      StdTestEpRet |            70.2 |
|      MaxTestEpRet |        5.85e+03 |
|      MinTestEpRet |        5.63e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.36e+05 |
|     AverageQ1Vals |             425 |
|         StdQ1Vals |            94.2 |
|         MaxQ1Vals |             537 |
|         MinQ1Vals |            -161 |
|     AverageQ2Vals |             425 |
|         StdQ2Vals |            94.2 |
|         MaxQ2Vals |             538 |
|         MinQ2Vals |            -149 |
|            LossPi |            -427 |
|             LossQ |            32.8 |
|              Time |        4.05e+03 |
---------------------------------------


---------------------------------------
|             Epoch |              43 |
|      AverageEpRet |        5.22e+03 |
|          StdEpRet |             330 |
|          MaxEpRet |         5.5e+03 |
|          MinEpRet |        4.69e+03 |
|  AverageTestEpRet |        6.18e+03 |
|      StdTestEpRet |            69.8 |
|      MaxTestEpRet |        6.28e+03 |
|      MinTestEpRet |        6.04e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |        1.72e+05 |
|     AverageQ1Vals |             478 |
|         StdQ1Vals |             102 |
|         MaxQ1Vals |             594 |
|         MinQ1Vals |           -84.1 |
|     AverageQ2Vals |             478 |
|         StdQ2Vals |             102 |
|         MaxQ2Vals |             593 |
|         MinQ2Vals |             -89 |
|            LossPi |            -480 |
|             LossQ |            37.7 |
|              Time |        5.14e+03 |
---------------------------------------


In [8]:
args = {'env': 'HalfCheetah-v2', 'hid': 256, 'l': 2, 'gamma': 0.99, 
        'seed': 0, 'epochs': 50,
        'nonstationary_env':True,
        'gravity_change_pattern': 'gravity_averagely_equal',
        'exp_name': 'td3_HalfCheetah_NoTargSmooth_NoDelayUpdate'}

from spinup.utils.run_utils import setup_logger_kwargs
logger_kwargs = setup_logger_kwargs(args['exp_name'], args['seed'])

td3(env_name=args['env'], actor_critic=core.MLPActorCritic,
     ac_kwargs=dict(hidden_sizes=[args['hid']]*args['l']), 
     gamma=args['gamma'], seed=args['seed'], epochs=args['epochs'],
    nonstationary_env=args['nonstationary_env'], 
    gravity_change_pattern=args['gravity_change_pattern'],
     logger_kwargs=logger_kwargs)

[32;1mLogging data to c:\users\lingheng\google drive\git_repos\spinningup-new\data\td3_HalfCheetah_NoTargSmooth_NoDelayUpdate\td3_HalfCheetah_NoTargSmooth_NoDelayUpdate_s0\progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            256,
            256
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x0000021F944728B8>",
    "epochs":	50,
    "exp_name":	"td3_HalfCheetah_NoTargSmooth_NoDelayUpdate",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x0000021FDE4066C8>":	{
            "epoch_dict":	{},
            "exp_name":	"td3_HalfCheetah_NoTargSmooth_NoDelayUpdate",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"c:\\users\\lingheng\\google drive\\git_repos\\spinningup-new\\data\\td3_HalfCheetah_NoTargSmooth_NoDelayUpdate\\td3_HalfCheetah_NoT

---------------------------------------
|             Epoch |               7 |
|      AverageEpRet |        2.45e+03 |
|          StdEpRet |             793 |
|          MaxEpRet |        3.08e+03 |
|          MinEpRet |        1.09e+03 |
|  AverageTestEpRet |        3.15e+03 |
|      StdTestEpRet |             163 |
|      MaxTestEpRet |        3.43e+03 |
|      MinTestEpRet |        2.91e+03 |
|             EpLen |           1e+03 |
|         TestEpLen |           1e+03 |
| TotalEnvInteracts |         2.8e+04 |
|     AverageQ1Vals |            70.7 |
|         StdQ1Vals |            40.3 |
|         MaxQ1Vals |             156 |
|         MinQ1Vals |             -58 |
|     AverageQ2Vals |            70.7 |
|         StdQ2Vals |            40.3 |
|         MaxQ2Vals |             156 |
|         MinQ2Vals |           -54.4 |
|            LossPi |           -72.9 |
|             LossQ |              12 |
|              Time |             847 |
---------------------------------------


KeyboardInterrupt: 