In [10]:
!python -m pip install torch --quiet
!python -m pip install gym stable-baselines3[extra] python-dotenv fsspec["s3"] boto3 s3fs==2022.11.0 tensorboard --quiet

In [1]:
%load_ext dotenv
%dotenv env

In [2]:
import numpy as np
import torch
torch.set_printoptions(precision=4, linewidth=200, sci_mode=False)
np.set_printoptions(precision=4, linewidth=200, suppress=True)

USER_INDEX = 1
SESSION_INDEX = 2
TIMESTAMP_INDEX = 11
TRAIN_SPLIT = 0.7
EVAL_SPLIT = 0.15


In [3]:
# %load npz_extractor.py
import logging
import os
import zipfile

import numpy as np
import logging 

class NPZExtractor:
    logger = logging.getLogger(__name__)
    def __init__(self, input_path, n_files, n_sequences, s3_client, data_partition) -> None:
        self.input_path = input_path
        self.n_files = n_files
        self.n_sequences = n_sequences
        self.s3_client = s3_client
        self.data_partition = data_partition


    def get_dataset_pointer(self):

        read_path = os.path.join(self.input_path, f'files_used_{self.n_files}')
        if not os.path.exists(read_path):
            print(f'Creating directory: {read_path}')
            os.makedirs(read_path)


        for _ in range(0, self.n_sequences +1, 10):
            key_zip, key_npy = (
                os.path.join(read_path, f'sequence_index_{_}.npz'),
                os.path.join(read_path, f'sequence_index_{_}')
            )

            self.logger.info(f'Loading pointer to dataset: {key_npy}: derived from {key_zip}')

            if not os.path.exists(key_npy):
                self.logger.info(f'Zip file to extract: {key_zip}: npy file to load: {key_npy}')
                # self.s3_client.download_file(
                #     'dissertation-data-dmiller',
                #     key_zip,
                #     key_zip
                # )
                self.logger.info(f'Zip file downloaded: {key_zip}')
                self._zip_extract(key_zip, key_npy)

        lz_concatenated_results = self._lazy_concatenate()

        if self.data_partition:
            return [p[:self.data_partition] for p in lz_concatenated_results]
        else:
            return lz_concatenated_results


    def _zip_extract(self, key_zip, key_npy):
        self.logger.info(f'Extracting file: {key_zip} -> {key_npy}')

        with zipfile.ZipFile(key_zip, 'r') as zip_ref:
            zip_ref.extractall(path=key_npy, members=['arr_0.npy'])


        self.logger.info(f'Zip file exracted: {key_zip} -> {key_npy}/arr_0.npy')

    def _lazy_concatenate(self):
        lz_concat = []
        for _ in range(0, self.n_sequences +1, 10):
            path_to_load = os.path.join(self.input_path, f'files_used_{self.n_files}', f'sequence_index_{_}', f'arr_0.npy')
            self.logger.info(f'Loading: {path_to_load}')
            lz_concat.append(np.load(path_to_load))
        return lz_concat

In [4]:
# %load callback
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.logger import TensorBoardOutputFormat
import numpy as np

class DistributionCallback(BaseCallback):

    def _on_training_start(self) -> None:
        self._log_freq = 100
        output_formats = self.logger.output_formats
        self.tb_formatter = next(f for f in output_formats if isinstance(f, TensorBoardOutputFormat))

    
    def _on_step(self) -> bool:
        if self.n_calls % self._log_freq == 0:
            dist_list = self.training_env.env_method('dists')
            dists = np.concatenate(dist_list, axis=1) 
            try:
                self.tb_formatter.writer.add_histogram('incentive_index', dists[:, 0], int(self.n_calls / self._log_freq))
                self.tb_formatter.writer.add_histogram('distance_session_end', dists[:, 1], int(self.n_calls / self._log_freq))
                self.tb_formatter.writer.add_histogram('distance_incentive_allocated', dists[:, 2], int(self.n_calls / self._log_freq))
                self.tb_formatter.writer.flush()
    
            except Exception as e:
                print(e)




In [5]:
# %load environment
import gym
import numpy as np

USER_INDEX = 1
SESSION_INDEX = 2
TASK_INDEX = 3

N_EVENT_INDEX = -1

USER_IN_SESSION_INDEX = 0
SESSION_COUNT_INDEX = 1
TASK_IN_SESSION_INDEX = 2
REWARD_ALLOCATED_INDEX = 3

SESSION_FINISHED_INDEX = -1

CUM_PLATFORM_TIME_INDEX = 4
METADATA_INDEX = 12
import logging
from scipy.stats import norm 


class CitizenScienceEnv(gym.Env):
    
    logger = logging.getLogger(__name__) 
    metadata = {'render.modes': ['human']}
    
    def __init__(self, user_sessions, experience_dataset, n_sequences, n_features) -> None:
        """
        trajectories: dictionary of user_id to their respective trajectories.
        n_sequences: number of sequences used for preprocessing.
        n_features: number of features used for preprocessing.
        """
        super(CitizenScienceEnv, self).__init__()
        self.user_sessions = user_sessions
        self.experience_dataset = experience_dataset

        self.action_space = gym.spaces.Discrete(2)
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(n_sequences + 1, n_features), dtype=np.float32)
        self.n_sequences = n_sequences
        self.n_features = n_features
        self.current_session = None
        
    def _extract_features(self, feature_array):
        
        metadata, features = feature_array[:, :METADATA_INDEX], feature_array[:, METADATA_INDEX:]
        features = features.reshape((features.shape[0], self.n_sequences + 1, self.n_features))
        features = np.flip(features, axis=1).squeeze(0)
        return metadata.squeeze(0), features

    def _state(self, user, session, task_count):
        
        """
        get index of current state
        """ 
        current_state = self.experience_dataset[
            (self.experience_dataset[:, USER_INDEX] == user) &
            (self.experience_dataset[:, SESSION_INDEX] == session) &
            (self.experience_dataset[:, TASK_INDEX] == task_count)
        ]

        metadata, features = self._extract_features(current_state)
        cum_platform_time = metadata[CUM_PLATFORM_TIME_INDEX]
        return features, cum_platform_time

    
    def _seed_user_session(self):
        """
        find all users sessions that have not been completed
        select random user session from list
        """
        current_session = self.user_sessions[self.user_sessions['ended'] == 0].sample(1)
        current_session['task_index'] = 1
        current_session['total_reward'] = 0
        self.current_session = current_session
        
    def step(self, action):
        
        self._take_action(action)
            
        state, rewards, done, meta = self._calculate_next_state() 
        if not done:
            self._update_session_metadata(self.current_session)
        
        return state, rewards, done, meta

    def _update_session_metadata(self, current_session):
        self.user_sessions.loc[current_session.index] = current_session 
        
    def _calculate_next_state(self):
        
        next_state = self.current_session['task_index'] + 1
        extending = self._extending()
        if not extending:
            self.logger.debug(f'User: {self.current_session} has completed their session')
            self._user_session_terminate()
            if self.user_sessions['ended'].all():
                self.logger.debug('All users have completed their sessions')
                return None, self.user_sessions['total_reward'].sum().astype(float), True, {}
            
            self._seed_user_session()
            user, session, count = self.current_session[['user_id', 'session_id', 'task_index']].values[0]
            return (
                self._state(user, session, count)[0], 
                self.user_sessions['total_reward'].sum().astype(float),
                False,
                {}
            )
        self.logger.debug(f'User: {self.current_session} has moving to next state: {next_state}')
        self.current_session['task_index'] = next_state
        user, session, count = self.current_session[['user_id', 'session_id', 'task_index']].values[0]
        state, cum_platform_time = self._state(user, session, count)
        self.current_session['total_reward'] = cum_platform_time
        return (
            state,
            self.user_sessions['total_reward'].sum().astype(float),
            False,
            {}
        )
    
    
    def _extending(self):
        current_session = self.current_session.to_dict('records')[0]
        if current_session['task_index'] == current_session['counts']:
            return False
    
        if current_session['task_index'] <= current_session['sim_counts']:
            return True

        continue_session = self._probability_extending(current_session)
        return all([continue_session >= 0.3, continue_session < 0.9])
    
    
    def _probability_extending(self, current_session):
        if current_session['incentive_index'] == 0:
            return 0
        else:
            continue_session = norm(
                loc=current_session['incentive_index'],
                scale=5
            ).cdf(current_session['task_index']) + self._gaussian_noise()
       
        return continue_session
        
    def _gaussian_noise(self):
        return np.random.normal(0, 0.1, 100).sum() / 10
     
    def _user_session_terminate(self):
        self.current_session['ended'] = 1
        self._update_session_metadata(self.current_session)
    
    def _take_action(self, action):
        
        current_session = self.current_session.to_dict('records')[0]
        
        if current_session['incentive_index'] > 0 or action == 0:
            self.logger.debug(f'Incentive already allocation for session or no-op: {action}, {current_session}')
            return
        
    
        self.logger.debug('Taking action and allocating incentive')
        self.current_session['incentive_index'] = self.current_session['task_index']
        self.current_session['reward_allocated'] = action
        
        self.logger.debug('Taking action and allocating incentive: updating user session')
        self.logger.debug(f'User session: {self.current_session}')

    def reset(self):
        self.user_sessions = self.user_sessions.sample(frac=1)
        self.user_sessions['incentive_index'] = 0
        self.user_sessions['task_index'] = 0
        self.user_sessions['ended'] = 0
        self.user_sessions['total_reward'] = 0
        self.user_sessions['total_reward'] = self.user_sessions['total_reward'].astype(float)
        
        self._seed_user_session()
        self._update_session_metadata(self.current_session)
        user, session, count = self.current_session[['user_id', 'session_id', 'task_index']].values[0]
        return self._state(user, session, count)[0]
        
    
    def render(self, mode='human'):
        print('rendering')
        
    def dists(self):
        incentive_index = self.user_sessions['incentive_index'].values
        distance_end = (self.user_sessions['counts'] - self.user_sessions['incentive_index']).values
        distance_reward = (self.user_sessions['total_reward'] - self.user_sessions['incentive_index']).values
        return np.array([incentive_index, distance_end, distance_reward])


In [6]:
# %load incentive_reinforcement_learning_cpu.py
import argparse
import numpy as np
import torch
torch.set_printoptions(precision=4, linewidth=200, sci_mode=False)
np.set_printoptions(precision=4, linewidth=200, suppress=True)
from stable_baselines3.common.callbacks import EvalCallback, CallbackList, StopTrainingOnMaxEpisodes, CheckpointCallback
from stable_baselines3 import PPO, A2C
import logging
USER_INDEX = 1
SESSION_INDEX = 2
TIMESTAMP_INDEX = 11
TRAIN_SPLIT = 0.7
EVAL_SPLIT = 0.15
import pandas as pd
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import SubprocVecEnv, DummyVecEnv
from datetime import datetime
from stable_baselines3.common.vec_env import VecMonitor
from pprint import pformat
import os


logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
np.set_printoptions(precision=4, linewidth=200, suppress=True)
torch.set_printoptions(precision=2, linewidth=200, sci_mode=False)


S3_BASELINE_PATH = 's3://dissertation-data-dmiller'

def train_eval_split(dataset, logger):
    train_split = int(dataset.shape[0] * TRAIN_SPLIT)
    eval_split = int(dataset.shape[0] * EVAL_SPLIT)
    test_split = dataset.shape[0] - train_split - eval_split
    logger.info(f'Train size: 0:{train_split}, eval size: {train_split}:{train_split+eval_split}: test size: {train_split + eval_split}:{dataset.shape[0]}')
    train_dataset, eval_dataset, test_split = dataset[:train_split], dataset[train_split:train_split+eval_split], dataset[train_split+eval_split:]
    
    return {
        'train': train_dataset,
        'eval': eval_dataset,
        'test': test_split
    }

def generate_metadata(dataset, logger):
     
    logger.info('Generating metadata tasks per session')
    sessions = pd.DataFrame(
        dataset[:, [USER_INDEX, SESSION_INDEX]],
        columns=['user_id', 'session_id']
    )
    
    sessions = sessions.groupby(['user_id', 'session_id']).size().reset_index(name='counts')
    sessions['sim_counts'] = (sessions['counts'] * 0.8).astype(int)
    sessions['sim_counts'] = sessions['sim_counts'].apply(lambda x: 1 if x == 0 else x)
    sessions['incentive_index'] = 0
    
    sessions['task_index'] = 0
    sessions['total_reward'] = 0
    sessions['total_reward'] = sessions['total_reward'].astype(float)
    sessions['ended'] = 0
    return sessions


def run_reinforcement_learning_incentives(environment, logger, n_episodes=1):
    for epoch in range(n_episodes):
        environment_comp = False
        state = environment.reset()
        i = 0
        while not environment_comp:
            next_action = (
                1 if np.random.uniform(low=0, high=1) > 0.8 else 0
            )
            state, rewards, environment_comp, meta = environment.step(next_action)
            i +=1
            if i % 100 == 0:
                logger.info(f'Step: {i} - Reward: {rewards}')
                
        logger.info(f'Epoch: {epoch} - Reward: {rewards}')
        print(environment.user_sessions.head(10))

    

def main(args):
    
    exec_time = datetime.now().strftime("%Y-%m-%d-%H-%M")
    logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)
    
    
    read_path, n_files, n_sequences, n_features, n_episodes, device = (
        args.read_path, 
        args.n_files, 
        args.n_sequences, 
        args.n_features, 
        args.n_episodes, 
        args.device
    )
    
    npz_extractor = NPZExtractor(
        read_path,
        n_files,
        n_sequences,
        None,
        10000
    )
    
    cpu_count = os.cpu_count()
   
    logger.info(f'Starting experiment at {exec_time}') 
    logger.info(f'Extracting dataset from npz files to tensor' )
    dataset = np.concatenate(npz_extractor.get_dataset_pointer(), axis=1)
    datasets = train_eval_split(dataset, logger)
    train_data = datasets['train']
 
    logger.info(f'Dataset shape: {dataset.shape}: generating metadata tensor')
    sessions_train = generate_metadata(train_data, logger)
    logger.info(f'Metadata train: {sessions_train.shape}')
    logger.info(f'Creating vectorized training environment: num envs: {cpu_count}')
    

    citizen_science_vec = SubprocVecEnv([lambda: CitizenScienceEnv(sessions_train, train_data, n_sequences, n_features) for _ in range(2)])
    """
    Eval environment is not used in training and is used after training to evaluate the agent
    """    
    logger.info(f'Vectorized environments created, wrapping with monitor')
    
    base_path = os.path.join(
        S3_BASELINE_PATH,
        'reinforcement_learning_incentives',
        f'n_files_{n_files}',
        'results',
        exec_time,
    ) 
 
    tensorboard_dir, checkpoint_dir = (
        os.path.join(base_path, 'training_metrics'),
        os.path.join(base_path, 'checkpoints'),
    )
 
    monitor_train = VecMonitor(citizen_science_vec)
    agent = A2C(
        'MlpPolicy',
        monitor_train,
        verbose=1,
        device='cuda',
        tensorboard_log=tensorboard_dir,
    )

    checkpoint_callback = CheckpointCallback(
        save_freq=10000 // 2,
        name_prefix='a2c',
        save_path=checkpoint_dir,
        verbose=1
    )
        
    callback_max_episodes = StopTrainingOnMaxEpisodes(max_episodes=n_episodes, verbose=1)
    
    dist_callback = DistributionCallback()
    callback_list = CallbackList([dist_callback, callback_max_episodes, checkpoint_callback])

    logger.info(pformat([
        'n_episodes: {}'.format(n_episodes),
        'read_path: {}'.format(read_path),
        'n_files: {}'.format(n_files),
        'n_sequences: {}'.format(n_sequences),
        'n_features: {}'.format(n_features),
        'total_timesteps: {}'.format(dataset.shape[0] -1),
        'device: {}'.format(device),
        'tensorboard_dir: {}'.format(tensorboard_dir),
        'checkpoint_dir: {}'.format(checkpoint_dir)
    ]))

    agent.learn(
        total_timesteps=int(10e7),
        log_interval=1000, 
        progress_bar=True,
        callback=callback_list
    )
    



In [7]:
class Argument:
    read_path = 'torch_ready_data'
    n_files = 2
    n_sequences = 10
    n_features = 18
    n_episodes = 10
    device = 'gpu'

In [8]:

main(Argument)

04/16/2023 11:20:30 AM Starting experiment at 2023-04-16-11-20
04/16/2023 11:20:30 AM Extracting dataset from npz files to tensor
04/16/2023 11:20:30 AM Loading pointer to dataset: torch_ready_data/files_used_2/sequence_index_0: derived from torch_ready_data/files_used_2/sequence_index_0.npz
04/16/2023 11:20:30 AM Loading pointer to dataset: torch_ready_data/files_used_2/sequence_index_10: derived from torch_ready_data/files_used_2/sequence_index_10.npz
04/16/2023 11:20:30 AM Loading: torch_ready_data/files_used_2/sequence_index_0/arr_0.npy
04/16/2023 11:20:30 AM Loading: torch_ready_data/files_used_2/sequence_index_10/arr_0.npy
04/16/2023 11:20:30 AM Train size: 0:7000, eval size: 7000:8500: test size: 8500:10000
04/16/2023 11:20:30 AM Dataset shape: (10000, 210): generating metadata tensor
04/16/2023 11:20:30 AM Generating metadata tasks per session
04/16/2023 11:20:30 AM Metadata train: (159, 8)
04/16/2023 11:20:30 AM Creating vectorized training environment: num envs: 8
04/16/2023 

Using cuda device


04/16/2023 11:20:36 AM ['n_episodes: 10',
 'read_path: torch_ready_data',
 'n_files: 2',
 'n_sequences: 10',
 'n_features: 18',
 'total_timesteps: 9999',
 'device: gpu',
 'tensorboard_dir: '
 's3://dissertation-data-dmiller/reinforcement_learning_incentives/n_files_2/results/2023-04-16-11-20/training_metrics',
 'checkpoint_dir: '
 's3://dissertation-data-dmiller/reinforcement_learning_incentives/n_files_2/results/2023-04-16-11-20/checkpoints']
04/16/2023 11:20:36 AM Found credentials in environment variables.


Logging to s3://dissertation-data-dmiller/reinforcement_learning_incentives/n_files_2/results/2023-04-16-11-20/training_metrics/A2C_1


Output()

-------------------------------------
| time/                 |           |
|    fps                | 63        |
|    iterations         | 50        |
|    time_elapsed       | 7         |
|    total_timesteps    | 500       |
| train/                |           |
|    entropy_loss       | -0.691    |
|    explained_variance | -5.36e-06 |
|    learning_rate      | 0.0007    |
|    n_updates          | 49        |
|    policy_loss        | 506       |
|    value_loss         | 6.21e+05  |
-------------------------------------


-------------------------------------
| time/                 |           |
|    fps                | 68        |
|    iterations         | 100       |
|    time_elapsed       | 14        |
|    total_timesteps    | 1000      |
| train/                |           |
|    entropy_loss       | -0.692    |
|    explained_variance | -7.75e-06 |
|    learning_rate      | 0.0007    |
|    n_updates          | 99        |
|    policy_loss        | 1.04e+03  |
|    value_loss         | 2.57e+06  |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 150      |
|    time_elapsed       | 20       |
|    total_timesteps    | 1500     |
| train/                |          |
|    entropy_loss       | -0.667   |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 149      |
|    policy_loss        | 1.31e+03 |
|    value_loss         | 5.86e+06 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 70       |
|    iterations         | 200      |
|    time_elapsed       | 28       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -0.318   |
|    explained_variance | 1.19e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 199      |
|    policy_loss        | 299      |
|    value_loss         | 1.05e+07 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 250      |
|    time_elapsed       | 34       |
|    total_timesteps    | 2500     |
| train/                |          |
|    entropy_loss       | -0.467   |
|    explained_variance | 1.79e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 249      |
|    policy_loss        | 1.85e+03 |
|    value_loss         | 1.64e+07 |
------------------------------------


-------------------------------------
| time/                 |           |
|    fps                | 71        |
|    iterations         | 300       |
|    time_elapsed       | 41        |
|    total_timesteps    | 3000      |
| train/                |           |
|    entropy_loss       | -0.358    |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 299       |
|    policy_loss        | 1.14e+03  |
|    value_loss         | 2.37e+07  |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 73       |
|    iterations         | 350      |
|    time_elapsed       | 47       |
|    total_timesteps    | 3500     |
| train/                |          |
|    entropy_loss       | -0.318   |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 349      |
|    policy_loss        | 1.3e+03  |
|    value_loss         | 3.23e+07 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 400      |
|    time_elapsed       | 55       |
|    total_timesteps    | 4000     |
| train/                |          |
|    entropy_loss       | -0.393   |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 399      |
|    policy_loss        | 3.18e+03 |
|    value_loss         | 4.22e+07 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 450      |
|    time_elapsed       | 61       |
|    total_timesteps    | 4500     |
| train/                |          |
|    entropy_loss       | -0.347   |
|    explained_variance | 0.000227 |
|    learning_rate      | 0.0007   |
|    n_updates          | 449      |
|    policy_loss        | 849      |
|    value_loss         | 5.34e+07 |
------------------------------------


-------------------------------------
| time/                 |           |
|    fps                | 72        |
|    iterations         | 500       |
|    time_elapsed       | 69        |
|    total_timesteps    | 5000      |
| train/                |           |
|    entropy_loss       | -0.382    |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 499       |
|    policy_loss        | 3.82e+03  |
|    value_loss         | 6.6e+07   |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 550      |
|    time_elapsed       | 75       |
|    total_timesteps    | 5500     |
| train/                |          |
|    entropy_loss       | -0.67    |
|    explained_variance | 1.19e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 549      |
|    policy_loss        | 5.69e+03 |
|    value_loss         | 7.99e+07 |
------------------------------------


-------------------------------------
| time/                 |           |
|    fps                | 72        |
|    iterations         | 600       |
|    time_elapsed       | 83        |
|    total_timesteps    | 6000      |
| train/                |           |
|    entropy_loss       | -0.337    |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 599       |
|    policy_loss        | 1.01e+03  |
|    value_loss         | 9.51e+07  |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 650      |
|    time_elapsed       | 89       |
|    total_timesteps    | 6500     |
| train/                |          |
|    entropy_loss       | -0.371   |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 649      |
|    policy_loss        | 3.85e+03 |
|    value_loss         | 1.12e+08 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 71       |
|    iterations         | 700      |
|    time_elapsed       | 97       |
|    total_timesteps    | 7000     |
| train/                |          |
|    entropy_loss       | -0.243   |
|    explained_variance | 5.96e-08 |
|    learning_rate      | 0.0007   |
|    n_updates          | 699      |
|    policy_loss        | 703      |
|    value_loss         | 1.29e+08 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 72       |
|    iterations         | 750      |
|    time_elapsed       | 103      |
|    total_timesteps    | 7500     |
| train/                |          |
|    entropy_loss       | -0.28    |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 749      |
|    policy_loss        | 1.01e+04 |
|    value_loss         | 1.49e+08 |
------------------------------------


-------------------------------------
| time/                 |           |
|    fps                | 71        |
|    iterations         | 800       |
|    time_elapsed       | 111       |
|    total_timesteps    | 8000      |
| train/                |           |
|    entropy_loss       | -0.604    |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 799       |
|    policy_loss        | 5.94e+03  |
|    value_loss         | 1.69e+08  |
-------------------------------------


-------------------------------------
| time/                 |           |
|    fps                | 72        |
|    iterations         | 850       |
|    time_elapsed       | 117       |
|    total_timesteps    | 8500      |
| train/                |           |
|    entropy_loss       | -0.179    |
|    explained_variance | -1.19e-07 |
|    learning_rate      | 0.0007    |
|    n_updates          | 849       |
|    policy_loss        | 6.95e+03  |
|    value_loss         | 1.91e+08  |
-------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 71       |
|    iterations         | 900      |
|    time_elapsed       | 126      |
|    total_timesteps    | 9000     |
| train/                |          |
|    entropy_loss       | -0.266   |
|    explained_variance | 7.15e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 899      |
|    policy_loss        | 1.64e+03 |
|    value_loss         | 2.14e+08 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 71       |
|    iterations         | 950      |
|    time_elapsed       | 132      |
|    total_timesteps    | 9500     |
| train/                |          |
|    entropy_loss       | -0.0899  |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 949      |
|    policy_loss        | 259      |
|    value_loss         | 2.39e+08 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 71       |
|    iterations         | 1000     |
|    time_elapsed       | 140      |
|    total_timesteps    | 10000    |
| train/                |          |
|    entropy_loss       | -0.113   |
|    explained_variance | 2.38e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 999      |
|    policy_loss        | 389      |
|    value_loss         | 2.65e+08 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 71       |
|    iterations         | 1050     |
|    time_elapsed       | 147      |
|    total_timesteps    | 10500    |
| train/                |          |
|    entropy_loss       | -0.0779  |
|    explained_variance | 7.15e-07 |
|    learning_rate      | 0.0007   |
|    n_updates          | 1049     |
|    policy_loss        | 201      |
|    value_loss         | 2.92e+08 |
------------------------------------


------------------------------------
| time/                 |          |
|    fps                | 70       |
|    iterations         | 1100     |
|    time_elapsed       | 155      |
|    total_timesteps    | 11000    |
| train/                |          |
|    entropy_loss       | -0.0438  |
|    explained_variance | 0        |
|    learning_rate      | 0.0007   |
|    n_updates          | 1099     |
|    policy_loss        | 121      |
|    value_loss         | 3.2e+08  |
------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 71         |
|    iterations         | 1150       |
|    time_elapsed       | 161        |
|    total_timesteps    | 11500      |
| train/                |            |
|    entropy_loss       | -0.197     |
|    explained_variance | -1e-05     |
|    learning_rate      | 0.0007     |
|    n_updates          | 1149       |
|    policy_loss        | 9.12       |
|    value_loss         | 9.76e+03   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1200       |
|    time_elapsed       | 169        |
|    total_timesteps    | 12000      |
| train/                |            |
|    entropy_loss       | -0.0227    |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1199       |
|    policy_loss        | 2.89       |
|    value_loss         | 8.35e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1250       |
|    time_elapsed       | 176        |
|    total_timesteps    | 12500      |
| train/                |            |
|    entropy_loss       | -0.0176    |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1249       |
|    policy_loss        | 3.96       |
|    value_loss         | 2.99e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1300       |
|    time_elapsed       | 184        |
|    total_timesteps    | 13000      |
| train/                |            |
|    entropy_loss       | -0.0468    |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1299       |
|    policy_loss        | 19.5       |
|    value_loss         | 6.46e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1350       |
|    time_elapsed       | 190        |
|    total_timesteps    | 13500      |
| train/                |            |
|    entropy_loss       | -0.0881    |
|    explained_variance | -5.96e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 1349       |
|    policy_loss        | 123        |
|    value_loss         | 1.13e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1400       |
|    time_elapsed       | 198        |
|    total_timesteps    | 14000      |
| train/                |            |
|    entropy_loss       | -0.0107    |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1399       |
|    policy_loss        | 5.38       |
|    value_loss         | 1.74e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1450       |
|    time_elapsed       | 205        |
|    total_timesteps    | 14500      |
| train/                |            |
|    entropy_loss       | -0.0136    |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1449       |
|    policy_loss        | 9.05       |
|    value_loss         | 2.48e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1500       |
|    time_elapsed       | 213        |
|    total_timesteps    | 15000      |
| train/                |            |
|    entropy_loss       | -0.00644   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1499       |
|    policy_loss        | 4.16       |
|    value_loss         | 3.36e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1550       |
|    time_elapsed       | 219        |
|    total_timesteps    | 15500      |
| train/                |            |
|    entropy_loss       | -0.00413   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 1549       |
|    policy_loss        | 2.86       |
|    value_loss         | 4.37e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1600       |
|    time_elapsed       | 227        |
|    total_timesteps    | 16000      |
| train/                |            |
|    entropy_loss       | -0.00261   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 1599       |
|    policy_loss        | 1.92       |
|    value_loss         | 5.52e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1650       |
|    time_elapsed       | 234        |
|    total_timesteps    | 16500      |
| train/                |            |
|    entropy_loss       | -0.00426   |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 1649       |
|    policy_loss        | 3.69       |
|    value_loss         | 6.79e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1700       |
|    time_elapsed       | 242        |
|    total_timesteps    | 17000      |
| train/                |            |
|    entropy_loss       | -0.00325   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 1699       |
|    policy_loss        | 2.99       |
|    value_loss         | 8.2e+07    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 70         |
|    iterations         | 1750       |
|    time_elapsed       | 249        |
|    total_timesteps    | 17500      |
| train/                |            |
|    entropy_loss       | -0.00238   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1749       |
|    policy_loss        | 2.3        |
|    value_loss         | 9.73e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 1800       |
|    time_elapsed       | 258        |
|    total_timesteps    | 18000      |
| train/                |            |
|    entropy_loss       | -0.00335   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1799       |
|    policy_loss        | 3.7        |
|    value_loss         | 1.14e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 1850       |
|    time_elapsed       | 265        |
|    total_timesteps    | 18500      |
| train/                |            |
|    entropy_loss       | -0.00234   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1849       |
|    policy_loss        | 2.63       |
|    value_loss         | 1.32e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 1900       |
|    time_elapsed       | 273        |
|    total_timesteps    | 19000      |
| train/                |            |
|    entropy_loss       | -0.00148   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 1899       |
|    policy_loss        | 1.68       |
|    value_loss         | 1.52e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 1950       |
|    time_elapsed       | 280        |
|    total_timesteps    | 19500      |
| train/                |            |
|    entropy_loss       | -0.0301    |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 1949       |
|    policy_loss        | 53.8       |
|    value_loss         | 1.72e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 2000       |
|    time_elapsed       | 288        |
|    total_timesteps    | 20000      |
| train/                |            |
|    entropy_loss       | -0.00133   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 1999       |
|    policy_loss        | 1.71       |
|    value_loss         | 1.94e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 2050       |
|    time_elapsed       | 295        |
|    total_timesteps    | 20500      |
| train/                |            |
|    entropy_loss       | -0.000888  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2049       |
|    policy_loss        | 1.16       |
|    value_loss         | 2.18e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 2100       |
|    time_elapsed       | 304        |
|    total_timesteps    | 21000      |
| train/                |            |
|    entropy_loss       | -0.00071   |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 2099       |
|    policy_loss        | 0.942      |
|    value_loss         | 2.42e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 69         |
|    iterations         | 2150       |
|    time_elapsed       | 311        |
|    total_timesteps    | 21500      |
| train/                |            |
|    entropy_loss       | -0.00249   |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2149       |
|    policy_loss        | 4.08       |
|    value_loss         | 2.69e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2200       |
|    time_elapsed       | 319        |
|    total_timesteps    | 22000      |
| train/                |            |
|    entropy_loss       | -0.00205   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 2199       |
|    policy_loss        | 3.39       |
|    value_loss         | 2.96e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16278435.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2250       |
|    time_elapsed       | 326        |
|    total_timesteps    | 22500      |
| train/                |            |
|    entropy_loss       | -0.0016    |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 2249       |
|    policy_loss        | 2.7        |
|    value_loss         | 3.25e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2300       |
|    time_elapsed       | 335        |
|    total_timesteps    | 23000      |
| train/                |            |
|    entropy_loss       | -0.00126   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2299       |
|    policy_loss        | 0.0301     |
|    value_loss         | 6.87e+04   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2350       |
|    time_elapsed       | 341        |
|    total_timesteps    | 23500      |
| train/                |            |
|    entropy_loss       | -0.00122   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 2349       |
|    policy_loss        | 0.124      |
|    value_loss         | 1.16e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2400       |
|    time_elapsed       | 350        |
|    total_timesteps    | 24000      |
| train/                |            |
|    entropy_loss       | -0.000794  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2399       |
|    policy_loss        | 0.129      |
|    value_loss         | 3.57e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2450       |
|    time_elapsed       | 356        |
|    total_timesteps    | 24500      |
| train/                |            |
|    entropy_loss       | -0.000671  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2449       |
|    policy_loss        | 0.155      |
|    value_loss         | 7.32e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2500       |
|    time_elapsed       | 365        |
|    total_timesteps    | 25000      |
| train/                |            |
|    entropy_loss       | -0.000541  |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 2499       |
|    policy_loss        | 0.158      |
|    value_loss         | 1.24e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2550       |
|    time_elapsed       | 371        |
|    total_timesteps    | 25500      |
| train/                |            |
|    entropy_loss       | -0.000446  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2549       |
|    policy_loss        | 0.157      |
|    value_loss         | 1.88e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2600       |
|    time_elapsed       | 380        |
|    total_timesteps    | 26000      |
| train/                |            |
|    entropy_loss       | -0.000372  |
|    explained_variance | 2.38e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2599       |
|    policy_loss        | 0.154      |
|    value_loss         | 2.65e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2650       |
|    time_elapsed       | 387        |
|    total_timesteps    | 26500      |
| train/                |            |
|    entropy_loss       | -0.000314  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2649       |
|    policy_loss        | 0.147      |
|    value_loss         | 3.56e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2700       |
|    time_elapsed       | 395        |
|    total_timesteps    | 27000      |
| train/                |            |
|    entropy_loss       | -0.000247  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2699       |
|    policy_loss        | 0.129      |
|    value_loss         | 4.6e+07    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2750       |
|    time_elapsed       | 402        |
|    total_timesteps    | 27500      |
| train/                |            |
|    entropy_loss       | -0.00023   |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2749       |
|    policy_loss        | 0.133      |
|    value_loss         | 5.77e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2800       |
|    time_elapsed       | 410        |
|    total_timesteps    | 28000      |
| train/                |            |
|    entropy_loss       | -0.000173  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 2799       |
|    policy_loss        | 0.109      |
|    value_loss         | 7.07e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2850       |
|    time_elapsed       | 418        |
|    total_timesteps    | 28500      |
| train/                |            |
|    entropy_loss       | -0.00112   |
|    explained_variance | 2.38e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2849       |
|    policy_loss        | 0.464      |
|    value_loss         | 8.51e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 68         |
|    iterations         | 2900       |
|    time_elapsed       | 426        |
|    total_timesteps    | 29000      |
| train/                |            |
|    entropy_loss       | -0.000973  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2899       |
|    policy_loss        | 0.729      |
|    value_loss         | 1.01e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 2950       |
|    time_elapsed       | 434        |
|    total_timesteps    | 29500      |
| train/                |            |
|    entropy_loss       | -0.000134  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 2949       |
|    policy_loss        | 0.107      |
|    value_loss         | 1.18e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3000       |
|    time_elapsed       | 442        |
|    total_timesteps    | 30000      |
| train/                |            |
|    entropy_loss       | -0.00228   |
|    explained_variance | 3.58e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 2999       |
|    policy_loss        | 2.21       |
|    value_loss         | 1.36e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3050       |
|    time_elapsed       | 449        |
|    total_timesteps    | 30500      |
| train/                |            |
|    entropy_loss       | -5.19e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3049       |
|    policy_loss        | 0.0432     |
|    value_loss         | 1.56e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3100       |
|    time_elapsed       | 458        |
|    total_timesteps    | 31000      |
| train/                |            |
|    entropy_loss       | -4.94e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 3099       |
|    policy_loss        | 0.0444     |
|    value_loss         | 1.77e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3150       |
|    time_elapsed       | 465        |
|    total_timesteps    | 31500      |
| train/                |            |
|    entropy_loss       | -3.58e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3149       |
|    policy_loss        | 0.0305     |
|    value_loss         | 1.99e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3200       |
|    time_elapsed       | 473        |
|    total_timesteps    | 32000      |
| train/                |            |
|    entropy_loss       | -3.04e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3199       |
|    policy_loss        | 0.0258     |
|    value_loss         | 2.23e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3250       |
|    time_elapsed       | 480        |
|    total_timesteps    | 32500      |
| train/                |            |
|    entropy_loss       | -2.56e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3249       |
|    policy_loss        | 0.0272     |
|    value_loss         | 2.48e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3300       |
|    time_elapsed       | 489        |
|    total_timesteps    | 33000      |
| train/                |            |
|    entropy_loss       | -2.26e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3299       |
|    policy_loss        | 0.0215     |
|    value_loss         | 2.74e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3350       |
|    time_elapsed       | 496        |
|    total_timesteps    | 33500      |
| train/                |            |
|    entropy_loss       | -2.01e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 3349       |
|    policy_loss        | 0.0225     |
|    value_loss         | 3.02e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.71e+03   |
|    ep_rew_mean        | 16239520.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3400       |
|    time_elapsed       | 504        |
|    total_timesteps    | 34000      |
| train/                |            |
|    entropy_loss       | -1.87e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 3399       |
|    policy_loss        | 0.0236     |
|    value_loss         | 3.31e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3450       |
|    time_elapsed       | 512        |
|    total_timesteps    | 34500      |
| train/                |            |
|    entropy_loss       | -1.69e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3449       |
|    policy_loss        | 0.000395   |
|    value_loss         | 2.08e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3500       |
|    time_elapsed       | 520        |
|    total_timesteps    | 35000      |
| train/                |            |
|    entropy_loss       | -1.53e-05  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 3499       |
|    policy_loss        | 0.0011     |
|    value_loss         | 1.62e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3550       |
|    time_elapsed       | 527        |
|    total_timesteps    | 35500      |
| train/                |            |
|    entropy_loss       | -0.000114  |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 3549       |
|    policy_loss        | 0.0312     |
|    value_loss         | 4.36e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3600       |
|    time_elapsed       | 535        |
|    total_timesteps    | 36000      |
| train/                |            |
|    entropy_loss       | -2.01e-05  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 3599       |
|    policy_loss        | 0.00425    |
|    value_loss         | 8.41e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3650       |
|    time_elapsed       | 542        |
|    total_timesteps    | 36500      |
| train/                |            |
|    entropy_loss       | -1.31e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3649       |
|    policy_loss        | 0.00321    |
|    value_loss         | 1.38e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3700       |
|    time_elapsed       | 551        |
|    total_timesteps    | 37000      |
| train/                |            |
|    entropy_loss       | -1.36e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3699       |
|    policy_loss        | 0.00392    |
|    value_loss         | 2.06e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 67         |
|    iterations         | 3750       |
|    time_elapsed       | 558        |
|    total_timesteps    | 37500      |
| train/                |            |
|    entropy_loss       | -1.09e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3749       |
|    policy_loss        | 0.00231    |
|    value_loss         | 2.86e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 3800       |
|    time_elapsed       | 567        |
|    total_timesteps    | 38000      |
| train/                |            |
|    entropy_loss       | -1.01e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 3799       |
|    policy_loss        | 0.00267    |
|    value_loss         | 3.8e+07    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 3850       |
|    time_elapsed       | 574        |
|    total_timesteps    | 38500      |
| train/                |            |
|    entropy_loss       | -1.03e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3849       |
|    policy_loss        | 0.00352    |
|    value_loss         | 4.88e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 3900       |
|    time_elapsed       | 583        |
|    total_timesteps    | 39000      |
| train/                |            |
|    entropy_loss       | -0.000703  |
|    explained_variance | 4.77e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 3899       |
|    policy_loss        | 0.383      |
|    value_loss         | 6.08e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 3950       |
|    time_elapsed       | 591        |
|    total_timesteps    | 39500      |
| train/                |            |
|    entropy_loss       | -9.67e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 3949       |
|    policy_loss        | 0.00434    |
|    value_loss         | 7.42e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4000       |
|    time_elapsed       | 600        |
|    total_timesteps    | 40000      |
| train/                |            |
|    entropy_loss       | -8.44e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 3999       |
|    policy_loss        | 0.00407    |
|    value_loss         | 8.88e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4050       |
|    time_elapsed       | 607        |
|    total_timesteps    | 40500      |
| train/                |            |
|    entropy_loss       | -8.11e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4049       |
|    policy_loss        | 0.00443    |
|    value_loss         | 1.05e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4100       |
|    time_elapsed       | 616        |
|    total_timesteps    | 41000      |
| train/                |            |
|    entropy_loss       | -7.46e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 4099       |
|    policy_loss        | 0.00477    |
|    value_loss         | 1.22e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4150       |
|    time_elapsed       | 623        |
|    total_timesteps    | 41500      |
| train/                |            |
|    entropy_loss       | -1.18e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4149       |
|    policy_loss        | 0.00769    |
|    value_loss         | 1.41e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4200       |
|    time_elapsed       | 632        |
|    total_timesteps    | 42000      |
| train/                |            |
|    entropy_loss       | -5.32e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4199       |
|    policy_loss        | 0.0816     |
|    value_loss         | 1.6e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4250       |
|    time_elapsed       | 639        |
|    total_timesteps    | 42500      |
| train/                |            |
|    entropy_loss       | -5.25e-05  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 4249       |
|    policy_loss        | 0.0702     |
|    value_loss         | 1.82e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4300       |
|    time_elapsed       | 647        |
|    total_timesteps    | 43000      |
| train/                |            |
|    entropy_loss       | -4.61e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4299       |
|    policy_loss        | 0.0792     |
|    value_loss         | 2.05e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4350       |
|    time_elapsed       | 654        |
|    total_timesteps    | 43500      |
| train/                |            |
|    entropy_loss       | -6.16e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 4349       |
|    policy_loss        | 0.00653    |
|    value_loss         | 2.28e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4400       |
|    time_elapsed       | 663        |
|    total_timesteps    | 44000      |
| train/                |            |
|    entropy_loss       | -6.8e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4399       |
|    policy_loss        | 0.00689    |
|    value_loss         | 2.54e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4450       |
|    time_elapsed       | 671        |
|    total_timesteps    | 44500      |
| train/                |            |
|    entropy_loss       | -5.8e-06   |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 4449       |
|    policy_loss        | 0.00724    |
|    value_loss         | 2.8e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4500       |
|    time_elapsed       | 680        |
|    total_timesteps    | 45000      |
| train/                |            |
|    entropy_loss       | -5.72e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4499       |
|    policy_loss        | 0.00759    |
|    value_loss         | 3.08e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16211441.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4550       |
|    time_elapsed       | 687        |
|    total_timesteps    | 45500      |
| train/                |            |
|    entropy_loss       | -5.69e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 4549       |
|    policy_loss        | 0.00794    |
|    value_loss         | 3.38e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4600       |
|    time_elapsed       | 695        |
|    total_timesteps    | 46000      |
| train/                |            |
|    entropy_loss       | -5.27e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 4599       |
|    policy_loss        | 0.000282   |
|    value_loss         | 4.23e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4650       |
|    time_elapsed       | 703        |
|    total_timesteps    | 46500      |
| train/                |            |
|    entropy_loss       | -5.07e-06  |
|    explained_variance | 1.79e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 4649       |
|    policy_loss        | 0.000634   |
|    value_loss         | 2.15e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 4700       |
|    time_elapsed       | 712        |
|    total_timesteps    | 47000      |
| train/                |            |
|    entropy_loss       | -5e-06     |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 4699       |
|    policy_loss        | 0.000952   |
|    value_loss         | 5.2e+06    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 66         |
|    iterations         | 4750       |
|    time_elapsed       | 719        |
|    total_timesteps    | 47500      |
| train/                |            |
|    entropy_loss       | -3.53e-05  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 4749       |
|    policy_loss        | 0.00311    |
|    value_loss         | 9.57e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 4800       |
|    time_elapsed       | 730        |
|    total_timesteps    | 48000      |
| train/                |            |
|    entropy_loss       | -4.22e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4799       |
|    policy_loss        | -0         |
|    value_loss         | 1.53e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 4850       |
|    time_elapsed       | 738        |
|    total_timesteps    | 48500      |
| train/                |            |
|    entropy_loss       | -4.14e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 4849       |
|    policy_loss        | -0         |
|    value_loss         | 2.23e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 4900       |
|    time_elapsed       | 752        |
|    total_timesteps    | 49000      |
| train/                |            |
|    entropy_loss       | -4.31e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 4899       |
|    policy_loss        | 0.000715   |
|    value_loss         | 3.06e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 4950       |
|    time_elapsed       | 759        |
|    total_timesteps    | 49500      |
| train/                |            |
|    entropy_loss       | -3.98e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 4949       |
|    policy_loss        | -0         |
|    value_loss         | 4.03e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 5000       |
|    time_elapsed       | 768        |
|    total_timesteps    | 50000      |
| train/                |            |
|    entropy_loss       | -3.92e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 4999       |
|    policy_loss        | -0         |
|    value_loss         | 5.13e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 65         |
|    iterations         | 5050       |
|    time_elapsed       | 776        |
|    total_timesteps    | 50500      |
| train/                |            |
|    entropy_loss       | -3.96e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 5049       |
|    policy_loss        | -0         |
|    value_loss         | 6.36e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5100       |
|    time_elapsed       | 785        |
|    total_timesteps    | 51000      |
| train/                |            |
|    entropy_loss       | -2.92e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 5099       |
|    policy_loss        | 0.017      |
|    value_loss         | 7.73e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5150       |
|    time_elapsed       | 792        |
|    total_timesteps    | 51500      |
| train/                |            |
|    entropy_loss       | -3.7e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5149       |
|    policy_loss        | -0         |
|    value_loss         | 9.23e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5200       |
|    time_elapsed       | 802        |
|    total_timesteps    | 52000      |
| train/                |            |
|    entropy_loss       | -2.87e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5199       |
|    policy_loss        | 0.0333     |
|    value_loss         | 1.09e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5250       |
|    time_elapsed       | 810        |
|    total_timesteps    | 52500      |
| train/                |            |
|    entropy_loss       | -0.000283  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 5249       |
|    policy_loss        | 0.389      |
|    value_loss         | 1.26e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5300       |
|    time_elapsed       | 819        |
|    total_timesteps    | 53000      |
| train/                |            |
|    entropy_loss       | -5.84e-06  |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 5299       |
|    policy_loss        | 0.0026     |
|    value_loss         | 1.45e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5350       |
|    time_elapsed       | 827        |
|    total_timesteps    | 53500      |
| train/                |            |
|    entropy_loss       | -2.62e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5349       |
|    policy_loss        | 0.0221     |
|    value_loss         | 1.65e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5400       |
|    time_elapsed       | 835        |
|    total_timesteps    | 54000      |
| train/                |            |
|    entropy_loss       | -3.22e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5399       |
|    policy_loss        | -0         |
|    value_loss         | 1.87e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5450       |
|    time_elapsed       | 843        |
|    total_timesteps    | 54500      |
| train/                |            |
|    entropy_loss       | -3.19e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5449       |
|    policy_loss        | -0         |
|    value_loss         | 2.1e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5500       |
|    time_elapsed       | 852        |
|    total_timesteps    | 55000      |
| train/                |            |
|    entropy_loss       | -3.8e-06   |
|    explained_variance | 1.79e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 5499       |
|    policy_loss        | 0.00198    |
|    value_loss         | 2.34e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5550       |
|    time_elapsed       | 860        |
|    total_timesteps    | 55500      |
| train/                |            |
|    entropy_loss       | -3.06e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 5549       |
|    policy_loss        | -0         |
|    value_loss         | 2.6e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5600       |
|    time_elapsed       | 869        |
|    total_timesteps    | 56000      |
| train/                |            |
|    entropy_loss       | -5.3e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5599       |
|    policy_loss        | 0.00366    |
|    value_loss         | 2.87e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16197724.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5650       |
|    time_elapsed       | 876        |
|    total_timesteps    | 56500      |
| train/                |            |
|    entropy_loss       | -2.97e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5649       |
|    policy_loss        | -0         |
|    value_loss         | 3.15e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5700       |
|    time_elapsed       | 885        |
|    total_timesteps    | 57000      |
| train/                |            |
|    entropy_loss       | -3.3e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5699       |
|    policy_loss        | -0         |
|    value_loss         | 827        |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5750       |
|    time_elapsed       | 893        |
|    total_timesteps    | 57500      |
| train/                |            |
|    entropy_loss       | -2.99e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5749       |
|    policy_loss        | -0         |
|    value_loss         | 7.06e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5800       |
|    time_elapsed       | 902        |
|    total_timesteps    | 58000      |
| train/                |            |
|    entropy_loss       | -3.1e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5799       |
|    policy_loss        | -0         |
|    value_loss         | 2.73e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5850       |
|    time_elapsed       | 909        |
|    total_timesteps    | 58500      |
| train/                |            |
|    entropy_loss       | -2.78e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 5849       |
|    policy_loss        | -0         |
|    value_loss         | 6.09e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5900       |
|    time_elapsed       | 918        |
|    total_timesteps    | 59000      |
| train/                |            |
|    entropy_loss       | -2.7e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5899       |
|    policy_loss        | -0         |
|    value_loss         | 1.08e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 5950       |
|    time_elapsed       | 926        |
|    total_timesteps    | 59500      |
| train/                |            |
|    entropy_loss       | -2.73e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5949       |
|    policy_loss        | -0         |
|    value_loss         | 1.68e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 6000       |
|    time_elapsed       | 935        |
|    total_timesteps    | 60000      |
| train/                |            |
|    entropy_loss       | -2.26e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 5999       |
|    policy_loss        | 0.0074     |
|    value_loss         | 2.41e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 6050       |
|    time_elapsed       | 944        |
|    total_timesteps    | 60500      |
| train/                |            |
|    entropy_loss       | -2.59e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 6049       |
|    policy_loss        | -0         |
|    value_loss         | 3.28e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 6100       |
|    time_elapsed       | 953        |
|    total_timesteps    | 61000      |
| train/                |            |
|    entropy_loss       | -2.69e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6099       |
|    policy_loss        | -0         |
|    value_loss         | 4.28e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 64         |
|    iterations         | 6150       |
|    time_elapsed       | 960        |
|    total_timesteps    | 61500      |
| train/                |            |
|    entropy_loss       | -4.53e-06  |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 6149       |
|    policy_loss        | 0.00159    |
|    value_loss         | 5.41e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6200       |
|    time_elapsed       | 969        |
|    total_timesteps    | 62000      |
| train/                |            |
|    entropy_loss       | -2.07e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 6199       |
|    policy_loss        | 0.0037     |
|    value_loss         | 6.67e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6250       |
|    time_elapsed       | 977        |
|    total_timesteps    | 62500      |
| train/                |            |
|    entropy_loss       | -2.45e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6249       |
|    policy_loss        | -0         |
|    value_loss         | 8.07e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6300       |
|    time_elapsed       | 986        |
|    total_timesteps    | 63000      |
| train/                |            |
|    entropy_loss       | -2.44e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 6299       |
|    policy_loss        | -0         |
|    value_loss         | 9.61e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6350       |
|    time_elapsed       | 993        |
|    total_timesteps    | 63500      |
| train/                |            |
|    entropy_loss       | -2.07e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6349       |
|    policy_loss        | 0.00496    |
|    value_loss         | 1.13e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6400       |
|    time_elapsed       | 1004       |
|    total_timesteps    | 64000      |
| train/                |            |
|    entropy_loss       | -3.73e-05  |
|    explained_variance | 1.79e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 6399       |
|    policy_loss        | 0.0294     |
|    value_loss         | 1.31e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6450       |
|    time_elapsed       | 1011       |
|    total_timesteps    | 64500      |
| train/                |            |
|    entropy_loss       | -4.05e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6449       |
|    policy_loss        | 0.00264    |
|    value_loss         | 1.5e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6500       |
|    time_elapsed       | 1020       |
|    total_timesteps    | 65000      |
| train/                |            |
|    entropy_loss       | -4e-06     |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 6499       |
|    policy_loss        | 0.00282    |
|    value_loss         | 1.7e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6550       |
|    time_elapsed       | 1028       |
|    total_timesteps    | 65500      |
| train/                |            |
|    entropy_loss       | -3.59e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 6549       |
|    policy_loss        | 0.0343     |
|    value_loss         | 1.92e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6600       |
|    time_elapsed       | 1037       |
|    total_timesteps    | 66000      |
| train/                |            |
|    entropy_loss       | -2.21e-06  |
|    explained_variance | 1.79e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 6599       |
|    policy_loss        | -0         |
|    value_loss         | 2.16e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6650       |
|    time_elapsed       | 1044       |
|    total_timesteps    | 66500      |
| train/                |            |
|    entropy_loss       | -2.2e-06   |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 6649       |
|    policy_loss        | -0         |
|    value_loss         | 2.4e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6700       |
|    time_elapsed       | 1053       |
|    total_timesteps    | 67000      |
| train/                |            |
|    entropy_loss       | -1.96e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6699       |
|    policy_loss        | 0.0336     |
|    value_loss         | 2.66e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6750       |
|    time_elapsed       | 1061       |
|    total_timesteps    | 67500      |
| train/                |            |
|    entropy_loss       | -1.87e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6749       |
|    policy_loss        | 0.0159     |
|    value_loss         | 2.94e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16187939.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6800       |
|    time_elapsed       | 1070       |
|    total_timesteps    | 68000      |
| train/                |            |
|    entropy_loss       | -2.13e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6799       |
|    policy_loss        | -0         |
|    value_loss         | 3.22e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6850       |
|    time_elapsed       | 1078       |
|    total_timesteps    | 68500      |
| train/                |            |
|    entropy_loss       | -2.11e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6849       |
|    policy_loss        | -0         |
|    value_loss         | 5.04e+04   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6900       |
|    time_elapsed       | 1087       |
|    total_timesteps    | 69000      |
| train/                |            |
|    entropy_loss       | -1.78e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 6899       |
|    policy_loss        | 0.00155    |
|    value_loss         | 1.08e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 6950       |
|    time_elapsed       | 1094       |
|    total_timesteps    | 69500      |
| train/                |            |
|    entropy_loss       | -3.54e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 6949       |
|    policy_loss        | 0.0004     |
|    value_loss         | 3.44e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7000       |
|    time_elapsed       | 1104       |
|    total_timesteps    | 70000      |
| train/                |            |
|    entropy_loss       | -2.04e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 6999       |
|    policy_loss        | -0         |
|    value_loss         | 7.13e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7050       |
|    time_elapsed       | 1112       |
|    total_timesteps    | 70500      |
| train/                |            |
|    entropy_loss       | -2.01e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7049       |
|    policy_loss        | -0         |
|    value_loss         | 1.21e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7100       |
|    time_elapsed       | 1121       |
|    total_timesteps    | 71000      |
| train/                |            |
|    entropy_loss       | -1.5e-05   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 7099       |
|    policy_loss        | 0.00513    |
|    value_loss         | 1.85e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7150       |
|    time_elapsed       | 1128       |
|    total_timesteps    | 71500      |
| train/                |            |
|    entropy_loss       | -3.17e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 7149       |
|    policy_loss        | -0         |
|    value_loss         | 2.61e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7200       |
|    time_elapsed       | 1137       |
|    total_timesteps    | 72000      |
| train/                |            |
|    entropy_loss       | -2.26e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 7199       |
|    policy_loss        | -0         |
|    value_loss         | 3.51e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7250       |
|    time_elapsed       | 1145       |
|    total_timesteps    | 72500      |
| train/                |            |
|    entropy_loss       | -1.92e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 7249       |
|    policy_loss        | -0         |
|    value_loss         | 4.55e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7300       |
|    time_elapsed       | 1154       |
|    total_timesteps    | 73000      |
| train/                |            |
|    entropy_loss       | -3.14e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 7299       |
|    policy_loss        | -0         |
|    value_loss         | 5.71e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7350       |
|    time_elapsed       | 1162       |
|    total_timesteps    | 73500      |
| train/                |            |
|    entropy_loss       | -1.89e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7349       |
|    policy_loss        | -0         |
|    value_loss         | 7.01e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7400       |
|    time_elapsed       | 1170       |
|    total_timesteps    | 74000      |
| train/                |            |
|    entropy_loss       | -1.89e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 7399       |
|    policy_loss        | -0         |
|    value_loss         | 8.45e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7450       |
|    time_elapsed       | 1178       |
|    total_timesteps    | 74500      |
| train/                |            |
|    entropy_loss       | -1.48e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 7449       |
|    policy_loss        | 0.0149     |
|    value_loss         | 1e+08      |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 63         |
|    iterations         | 7500       |
|    time_elapsed       | 1187       |
|    total_timesteps    | 75000      |
| train/                |            |
|    entropy_loss       | -2.99e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7499       |
|    policy_loss        | 0.018      |
|    value_loss         | 1.17e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7550       |
|    time_elapsed       | 1199       |
|    total_timesteps    | 75500      |
| train/                |            |
|    entropy_loss       | -3.1e-06   |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 7549       |
|    policy_loss        | -0         |
|    value_loss         | 1.35e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7600       |
|    time_elapsed       | 1210       |
|    total_timesteps    | 76000      |
| train/                |            |
|    entropy_loss       | -1.82e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7599       |
|    policy_loss        | -0         |
|    value_loss         | 1.55e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7650       |
|    time_elapsed       | 1218       |
|    total_timesteps    | 76500      |
| train/                |            |
|    entropy_loss       | -1.8e-06   |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 7649       |
|    policy_loss        | -0         |
|    value_loss         | 1.76e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7700       |
|    time_elapsed       | 1228       |
|    total_timesteps    | 77000      |
| train/                |            |
|    entropy_loss       | -1.79e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 7699       |
|    policy_loss        | -0         |
|    value_loss         | 1.98e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7750       |
|    time_elapsed       | 1236       |
|    total_timesteps    | 77500      |
| train/                |            |
|    entropy_loss       | -1.53e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7749       |
|    policy_loss        | 0.0232     |
|    value_loss         | 2.22e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7800       |
|    time_elapsed       | 1245       |
|    total_timesteps    | 78000      |
| train/                |            |
|    entropy_loss       | -1.74e-06  |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 7799       |
|    policy_loss        | -0         |
|    value_loss         | 2.46e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7850       |
|    time_elapsed       | 1252       |
|    total_timesteps    | 78500      |
| train/                |            |
|    entropy_loss       | -1.73e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7849       |
|    policy_loss        | -0         |
|    value_loss         | 2.73e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7900       |
|    time_elapsed       | 1261       |
|    total_timesteps    | 79000      |
| train/                |            |
|    entropy_loss       | -1.47e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7899       |
|    policy_loss        | 0.0258     |
|    value_loss         | 3e+08      |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.7e+03    |
|    ep_rew_mean        | 16180964.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 7950       |
|    time_elapsed       | 1269       |
|    total_timesteps    | 79500      |
| train/                |            |
|    entropy_loss       | -3.7e-06   |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 7949       |
|    policy_loss        | 0.00392    |
|    value_loss         | 3.29e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8000       |
|    time_elapsed       | 1277       |
|    total_timesteps    | 80000      |
| train/                |            |
|    entropy_loss       | -2.76e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 7999       |
|    policy_loss        | -0         |
|    value_loss         | 1.73e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8050       |
|    time_elapsed       | 1286       |
|    total_timesteps    | 80500      |
| train/                |            |
|    entropy_loss       | -1.66e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8049       |
|    policy_loss        | -0         |
|    value_loss         | 1.51e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8100       |
|    time_elapsed       | 1295       |
|    total_timesteps    | 81000      |
| train/                |            |
|    entropy_loss       | -1.64e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 8099       |
|    policy_loss        | -0         |
|    value_loss         | 4.19e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8150       |
|    time_elapsed       | 1303       |
|    total_timesteps    | 81500      |
| train/                |            |
|    entropy_loss       | -1.64e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8149       |
|    policy_loss        | -0         |
|    value_loss         | 8.2e+06    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8200       |
|    time_elapsed       | 1313       |
|    total_timesteps    | 82000      |
| train/                |            |
|    entropy_loss       | -1.61e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8199       |
|    policy_loss        | -0         |
|    value_loss         | 1.35e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8250       |
|    time_elapsed       | 1320       |
|    total_timesteps    | 82500      |
| train/                |            |
|    entropy_loss       | -1.61e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8249       |
|    policy_loss        | -0         |
|    value_loss         | 2.02e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8300       |
|    time_elapsed       | 1330       |
|    total_timesteps    | 83000      |
| train/                |            |
|    entropy_loss       | -1.6e-06   |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8299       |
|    policy_loss        | -0         |
|    value_loss         | 2.82e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8350       |
|    time_elapsed       | 1342       |
|    total_timesteps    | 83500      |
| train/                |            |
|    entropy_loss       | -1.58e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8349       |
|    policy_loss        | -0         |
|    value_loss         | 3.75e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8400       |
|    time_elapsed       | 1353       |
|    total_timesteps    | 84000      |
| train/                |            |
|    entropy_loss       | -1.56e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8399       |
|    policy_loss        | -0         |
|    value_loss         | 4.81e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8450       |
|    time_elapsed       | 1360       |
|    total_timesteps    | 84500      |
| train/                |            |
|    entropy_loss       | -1.54e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8449       |
|    policy_loss        | -0         |
|    value_loss         | 6e+07      |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8500       |
|    time_elapsed       | 1370       |
|    total_timesteps    | 85000      |
| train/                |            |
|    entropy_loss       | -2.54e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8499       |
|    policy_loss        | -0         |
|    value_loss         | 7.34e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8550       |
|    time_elapsed       | 1377       |
|    total_timesteps    | 85500      |
| train/                |            |
|    entropy_loss       | -2.64e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8549       |
|    policy_loss        | -0         |
|    value_loss         | 8.8e+07    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8600       |
|    time_elapsed       | 1387       |
|    total_timesteps    | 86000      |
| train/                |            |
|    entropy_loss       | -1.51e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8599       |
|    policy_loss        | -0         |
|    value_loss         | 1.04e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 62         |
|    iterations         | 8650       |
|    time_elapsed       | 1394       |
|    total_timesteps    | 86500      |
| train/                |            |
|    entropy_loss       | -1.51e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8649       |
|    policy_loss        | -0         |
|    value_loss         | 1.21e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8700       |
|    time_elapsed       | 1403       |
|    total_timesteps    | 87000      |
| train/                |            |
|    entropy_loss       | -1.65e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8699       |
|    policy_loss        | -0         |
|    value_loss         | 1.4e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8750       |
|    time_elapsed       | 1411       |
|    total_timesteps    | 87500      |
| train/                |            |
|    entropy_loss       | -1.48e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8749       |
|    policy_loss        | -0         |
|    value_loss         | 1.6e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8800       |
|    time_elapsed       | 1420       |
|    total_timesteps    | 88000      |
| train/                |            |
|    entropy_loss       | -2.05e-05  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8799       |
|    policy_loss        | 0.0258     |
|    value_loss         | 1.81e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8850       |
|    time_elapsed       | 1429       |
|    total_timesteps    | 88500      |
| train/                |            |
|    entropy_loss       | -1.45e-06  |
|    explained_variance | 2.38e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 8849       |
|    policy_loss        | -0         |
|    value_loss         | 2.04e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8900       |
|    time_elapsed       | 1438       |
|    total_timesteps    | 89000      |
| train/                |            |
|    entropy_loss       | -1.44e-06  |
|    explained_variance | 1.79e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 8899       |
|    policy_loss        | -0         |
|    value_loss         | 2.27e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 8950       |
|    time_elapsed       | 1446       |
|    total_timesteps    | 89500      |
| train/                |            |
|    entropy_loss       | -1.05e-05  |
|    explained_variance | -2.38e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 8949       |
|    policy_loss        | 0.0067     |
|    value_loss         | 2.53e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9000       |
|    time_elapsed       | 1455       |
|    total_timesteps    | 90000      |
| train/                |            |
|    entropy_loss       | -1.42e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 8999       |
|    policy_loss        | -0         |
|    value_loss         | 2.79e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9050       |
|    time_elapsed       | 1462       |
|    total_timesteps    | 90500      |
| train/                |            |
|    entropy_loss       | -1.46e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9049       |
|    policy_loss        | -0         |
|    value_loss         | 3.07e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16177944.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9100       |
|    time_elapsed       | 1473       |
|    total_timesteps    | 91000      |
| train/                |            |
|    entropy_loss       | -2.33e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9099       |
|    policy_loss        | -0         |
|    value_loss         | 3.36e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9150       |
|    time_elapsed       | 1481       |
|    total_timesteps    | 91500      |
| train/                |            |
|    entropy_loss       | -2.73e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 9149       |
|    policy_loss        | -0         |
|    value_loss         | 3.74e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9200       |
|    time_elapsed       | 1490       |
|    total_timesteps    | 92000      |
| train/                |            |
|    entropy_loss       | -2.3e-06   |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 9199       |
|    policy_loss        | 0.000102   |
|    value_loss         | 2.03e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9250       |
|    time_elapsed       | 1498       |
|    total_timesteps    | 92500      |
| train/                |            |
|    entropy_loss       | -2.23e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9249       |
|    policy_loss        | -0         |
|    value_loss         | 5.03e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9300       |
|    time_elapsed       | 1510       |
|    total_timesteps    | 93000      |
| train/                |            |
|    entropy_loss       | -1.51e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9299       |
|    policy_loss        | -0         |
|    value_loss         | 9.34e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9350       |
|    time_elapsed       | 1518       |
|    total_timesteps    | 93500      |
| train/                |            |
|    entropy_loss       | -1.34e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9349       |
|    policy_loss        | -0         |
|    value_loss         | 1.5e+07    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9400       |
|    time_elapsed       | 1528       |
|    total_timesteps    | 94000      |
| train/                |            |
|    entropy_loss       | -1.33e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9399       |
|    policy_loss        | -0         |
|    value_loss         | 2.2e+07    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9450       |
|    time_elapsed       | 1536       |
|    total_timesteps    | 94500      |
| train/                |            |
|    entropy_loss       | -1.32e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 9449       |
|    policy_loss        | -0         |
|    value_loss         | 3.03e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9500       |
|    time_elapsed       | 1546       |
|    total_timesteps    | 95000      |
| train/                |            |
|    entropy_loss       | -1.54e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 9499       |
|    policy_loss        | -0         |
|    value_loss         | 3.99e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9550       |
|    time_elapsed       | 1554       |
|    total_timesteps    | 95500      |
| train/                |            |
|    entropy_loss       | -1.31e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9549       |
|    policy_loss        | -0         |
|    value_loss         | 5.08e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9600       |
|    time_elapsed       | 1565       |
|    total_timesteps    | 96000      |
| train/                |            |
|    entropy_loss       | -1.34e-06  |
|    explained_variance | 2.38e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 9599       |
|    policy_loss        | -0         |
|    value_loss         | 6.31e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9650       |
|    time_elapsed       | 1573       |
|    total_timesteps    | 96500      |
| train/                |            |
|    entropy_loss       | -1.29e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 9649       |
|    policy_loss        | -0         |
|    value_loss         | 7.67e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9700       |
|    time_elapsed       | 1582       |
|    total_timesteps    | 97000      |
| train/                |            |
|    entropy_loss       | -9.44e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9699       |
|    policy_loss        | 0.00182    |
|    value_loss         | 9.15e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9750       |
|    time_elapsed       | 1591       |
|    total_timesteps    | 97500      |
| train/                |            |
|    entropy_loss       | -1.26e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 9749       |
|    policy_loss        | -0         |
|    value_loss         | 1.08e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9800       |
|    time_elapsed       | 1600       |
|    total_timesteps    | 98000      |
| train/                |            |
|    entropy_loss       | -1.26e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9799       |
|    policy_loss        | -0         |
|    value_loss         | 1.25e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9850       |
|    time_elapsed       | 1608       |
|    total_timesteps    | 98500      |
| train/                |            |
|    entropy_loss       | -1.24e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 9849       |
|    policy_loss        | -0         |
|    value_loss         | 1.44e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 61         |
|    iterations         | 9900       |
|    time_elapsed       | 1617       |
|    total_timesteps    | 99000      |
| train/                |            |
|    entropy_loss       | -9.07e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 9899       |
|    policy_loss        | 0.00244    |
|    value_loss         | 1.64e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 9950       |
|    time_elapsed       | 1634       |
|    total_timesteps    | 99500      |
| train/                |            |
|    entropy_loss       | -1.23e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9949       |
|    policy_loss        | -0         |
|    value_loss         | 1.86e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10000      |
|    time_elapsed       | 1648       |
|    total_timesteps    | 100000     |
| train/                |            |
|    entropy_loss       | -1.44e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 9999       |
|    policy_loss        | -0         |
|    value_loss         | 2.09e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10050      |
|    time_elapsed       | 1656       |
|    total_timesteps    | 100500     |
| train/                |            |
|    entropy_loss       | -9.32e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10049      |
|    policy_loss        | 0.0029     |
|    value_loss         | 2.33e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10100      |
|    time_elapsed       | 1666       |
|    total_timesteps    | 101000     |
| train/                |            |
|    entropy_loss       | -1.24e-06  |
|    explained_variance | 1.79e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 10099      |
|    policy_loss        | -0         |
|    value_loss         | 2.59e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10150      |
|    time_elapsed       | 1674       |
|    total_timesteps    | 101500     |
| train/                |            |
|    entropy_loss       | -1.21e-06  |
|    explained_variance | 5.96e-08   |
|    learning_rate      | 0.0007     |
|    n_updates          | 10149      |
|    policy_loss        | -0         |
|    value_loss         | 2.86e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16175356.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10200      |
|    time_elapsed       | 1684       |
|    total_timesteps    | 102000     |
| train/                |            |
|    entropy_loss       | -1.2e-06   |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10199      |
|    policy_loss        | -0         |
|    value_loss         | 3.14e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10250      |
|    time_elapsed       | 1692       |
|    total_timesteps    | 102500     |
| train/                |            |
|    entropy_loss       | -1.74e-05  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10249      |
|    policy_loss        | -1.2e-05   |
|    value_loss         | 1.71e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10300      |
|    time_elapsed       | 1702       |
|    total_timesteps    | 103000     |
| train/                |            |
|    entropy_loss       | -1.2e-06   |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 10299      |
|    policy_loss        | -0         |
|    value_loss         | 6.51e+05   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10350      |
|    time_elapsed       | 1711       |
|    total_timesteps    | 103500     |
| train/                |            |
|    entropy_loss       | -1.2e-06   |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 10349      |
|    policy_loss        | -0         |
|    value_loss         | 2.63e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10400      |
|    time_elapsed       | 1722       |
|    total_timesteps    | 104000     |
| train/                |            |
|    entropy_loss       | -1.18e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10399      |
|    policy_loss        | -0         |
|    value_loss         | 5.95e+06   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10450      |
|    time_elapsed       | 1731       |
|    total_timesteps    | 104500     |
| train/                |            |
|    entropy_loss       | -1.17e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 10449      |
|    policy_loss        | -0         |
|    value_loss         | 1.06e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10500      |
|    time_elapsed       | 1741       |
|    total_timesteps    | 105000     |
| train/                |            |
|    entropy_loss       | -1.91e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 10499      |
|    policy_loss        | -0         |
|    value_loss         | 1.65e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10550      |
|    time_elapsed       | 1749       |
|    total_timesteps    | 105500     |
| train/                |            |
|    entropy_loss       | -1.17e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10549      |
|    policy_loss        | -0         |
|    value_loss         | 2.38e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10600      |
|    time_elapsed       | 1759       |
|    total_timesteps    | 106000     |
| train/                |            |
|    entropy_loss       | -1.23e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 10599      |
|    policy_loss        | -0         |
|    value_loss         | 3.25e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10650      |
|    time_elapsed       | 1767       |
|    total_timesteps    | 106500     |
| train/                |            |
|    entropy_loss       | -1.15e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 10649      |
|    policy_loss        | -0         |
|    value_loss         | 4.24e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10700      |
|    time_elapsed       | 1778       |
|    total_timesteps    | 107000     |
| train/                |            |
|    entropy_loss       | -1.14e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10699      |
|    policy_loss        | -0         |
|    value_loss         | 5.37e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10750      |
|    time_elapsed       | 1786       |
|    total_timesteps    | 107500     |
| train/                |            |
|    entropy_loss       | -1.25e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10749      |
|    policy_loss        | -0         |
|    value_loss         | 6.63e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 60         |
|    iterations         | 10800      |
|    time_elapsed       | 1796       |
|    total_timesteps    | 108000     |
| train/                |            |
|    entropy_loss       | -8.89e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 10799      |
|    policy_loss        | 0.0017     |
|    value_loss         | 8.02e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 10850      |
|    time_elapsed       | 1811       |
|    total_timesteps    | 108500     |
| train/                |            |
|    entropy_loss       | -1.13e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 10849      |
|    policy_loss        | -0         |
|    value_loss         | 9.55e+07   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 10900      |
|    time_elapsed       | 1822       |
|    total_timesteps    | 109000     |
| train/                |            |
|    entropy_loss       | -1.13e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10899      |
|    policy_loss        | -0         |
|    value_loss         | 1.12e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 10950      |
|    time_elapsed       | 1831       |
|    total_timesteps    | 109500     |
| train/                |            |
|    entropy_loss       | -1.12e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10949      |
|    policy_loss        | -0         |
|    value_loss         | 1.3e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11000      |
|    time_elapsed       | 1842       |
|    total_timesteps    | 110000     |
| train/                |            |
|    entropy_loss       | -1.85e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 10999      |
|    policy_loss        | -0         |
|    value_loss         | 1.49e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11050      |
|    time_elapsed       | 1850       |
|    total_timesteps    | 110500     |
| train/                |            |
|    entropy_loss       | -1.85e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 11049      |
|    policy_loss        | -0         |
|    value_loss         | 1.7e+08    |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11100      |
|    time_elapsed       | 1861       |
|    total_timesteps    | 111000     |
| train/                |            |
|    entropy_loss       | -1.16e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 11099      |
|    policy_loss        | -0         |
|    value_loss         | 1.92e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11150      |
|    time_elapsed       | 1869       |
|    total_timesteps    | 111500     |
| train/                |            |
|    entropy_loss       | -2.83e-06  |
|    explained_variance | -1.19e-07  |
|    learning_rate      | 0.0007     |
|    n_updates          | 11149      |
|    policy_loss        | 0.00105    |
|    value_loss         | 2.15e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11200      |
|    time_elapsed       | 1881       |
|    total_timesteps    | 112000     |
| train/                |            |
|    entropy_loss       | -1.07e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 11199      |
|    policy_loss        | -0         |
|    value_loss         | 2.39e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11250      |
|    time_elapsed       | 1890       |
|    total_timesteps    | 112500     |
| train/                |            |
|    entropy_loss       | -1.66e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 11249      |
|    policy_loss        | 0.00117    |
|    value_loss         | 2.65e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11300      |
|    time_elapsed       | 1900       |
|    total_timesteps    | 113000     |
| train/                |            |
|    entropy_loss       | -1.13e-06  |
|    explained_variance | 0          |
|    learning_rate      | 0.0007     |
|    n_updates          | 11299      |
|    policy_loss        | -0         |
|    value_loss         | 2.92e+08   |
--------------------------------------


--------------------------------------
| rollout/              |            |
|    ep_len_mean        | 5.69e+03   |
|    ep_rew_mean        | 16172860.0 |
| time/                 |            |
|    fps                | 59         |
|    iterations         | 11350      |
|    time_elapsed       | 1908       |
|    total_timesteps    | 113500     |
| train/                |            |
|    entropy_loss       | -1.06e-06  |
|    explained_variance | 1.19e-07   |
|    learning_rate      | 0.0007     |
|    n_updates          | 11349      |
|    policy_loss        | -0         |
|    value_loss         | 3.21e+08   |
--------------------------------------
