In [2]:
!pip install python-dotenv --quiet
!pip install gym stable-baselines3[extra] awscli boto3 pqdm --quiet


[0m

In [3]:
%load_ext dotenv
%dotenv env

In [9]:
# %load rl_constant.py
# %load rl_constant.py
FEATURE_COLS = [

    "country_count",
    "date_hour_cos",
    "date_hour_sin",
    "date_minute_cos",
    "date_minute_sin",
    
    "session_30_count",
    "session_5_count",
    "cum_session_event",
    "convolved_delta_event",
    "cum_session_time",
    
    "expanding_click_average",
    "cum_platform_time",
    "cum_platform_event",
    "cum_projects",
    "average_event_time",
    
    "rolling_session_time",
    "rolling_session_events",
    "rolling_session_gap",
    "previous_session_time",
    "previous_session_events",
]

METADATA_COLS = [
    
    "user_id",
    "date_time",
    "session_30_count_raw",
    "cum_session_event_raw",
    "cum_session_time_raw",
    "reward",
    "session_minutes",
    "session_size",
    "sim_minutes",
    "sim_size",
]

RL_STAT_COLS = [
    'session_size',
    'session_minutes',
    'sim_size',
    'sim_minutes',

]

PREDICTION_COLS = [
    "seq_40",
    "label"
]

LOAD_COLS = list(set(FEATURE_COLS + METADATA_COLS + RL_STAT_COLS + PREDICTION_COLS))

In [10]:
# %load environment
import gym
import numpy as np
from scipy.stats import norm

import numpy as np
from scipy.stats import norm 
import gym
from datetime import datetime
from copy import deepcopy

class CitizenScienceEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self, dataset, out_features, n_sequences, evaluation=False):
        """
        trajectories: dictionary of user_id to their respective trajectories.
        n_sequences: number of sequences used for preprocessing.
        n_features: number of features used for preprocessing.
        """
        super(CitizenScienceEnv, self).__init__()
        self.dataset = dataset
        self.n_sequences = n_sequences
        self.current_session = None
        self.current_session_index = 0
        self.reward = 0
        self.n_sequences = n_sequences
        self.out_features = out_features
        
        self.action_space = gym.spaces.Discrete(2)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(len(out_features), n_sequences + 1), dtype=np.float32)
        self.evalution = evaluation
        self.episode_bins = []
        self.exp_runs = 0

    def reset(self):
        user_to_run, session_to_run = self.dataset.sample(1)[['user_id', 'session_30_count_raw']].values[0]
        self.current_session = self._get_events(user_to_run, session_to_run)
        self.metadata = self._metadata()
        self.current_session_index = 0
        self.reward = 0
        return self._state()
    
    def _row_to_dict(self, metadata):
        """
        Convert a row of metadata to a dictionary.
        """
        return metadata.to_dict()

    def step(self, action):
        
        self._take_action(action)
        next_state, done, meta = self._calculate_next_state()
        
        if done:
            current_session_index = self.current_session_index if \
                self.current_session_index != self.current_session.shape[0] else self.current_session.shape[0] - 1
            
            self.exp_runs += 1
        
            self.metadata['ended'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['reward'] = self.current_session.iloc[current_session_index]['reward']
            self.metadata['session_exp_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            self.metadata['exp_runs'] = self.exp_runs
            self.episode_bins.append(self._row_to_dict(self.metadata))
            
            return next_state, float(self.reward), done, {}
        else:
            self.reward = self.current_session.iloc[self.current_session_index]['reward'] 
            self.current_session_index += 1        
            return next_state, float(self.reward), done, meta
    
    def _metadata(self):
        session_metadata = self.current_session.iloc[0][RL_STAT_COLS]
        session_metadata['ended'] = 0
        session_metadata['incentive_index'] = 0
        return session_metadata
    
    
    def flush_episode_bins(self):
        episode_bins = self.episode_bins.copy()
        self.episode_bins = []
        return episode_bins
    
    def _calculate_next_state(self):
        
        if (self.current_session_index == self.current_session.shape[0]):
            return None, True, {}

        if self._continuing_in_session():
            return self._state(), False, {}
    
        return None, True, {}
         
    def _continuing_in_session(self):
        sim_minutes = self.current_session.iloc[self.current_session_index]['sim_minutes']
        current_session_minutes = self.current_session.iloc[self.current_session_index]['cum_session_time_raw']
        if current_session_minutes < sim_minutes:
            return True
        
        extending_session = self._probability_extending_session(current_session_minutes)
        
        return all([extending_session >= .3, extending_session <= .7])
        
    
    def _probability_extending_session(self, current_session_count):
        if self.metadata['incentive_index'] == 0:
            return 0
        
        scale = max(5, int(self.metadata['session_minutes'] / 5))
        continue_session = norm(
            loc=self.metadata['incentive_time'],
            scale=scale
        ).cdf(current_session_count)
        
        return continue_session
        

    def _get_events(self, user_id, session):
        subset = self.dataset[
            (self.dataset['user_id'] == user_id) &
            (self.dataset['session_30_count_raw'] == session).copy()
        ]

        subset = subset.sort_values(by=['date_time'])
        return subset
    
    def _take_action(self, action):
        if action == 0 or self.metadata['incentive_index'] > 0:
            return
        
        current_session_index = self.current_session_index if \
            self.current_session_index != self.current_session.shape[0] else self.current_session.shape[0] - 1
        
        self.metadata['incentive_index'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
        self.metadata['incentive_time'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
        
    def _state(self):

        if self.current_session_index > self.n_sequences:
            events = self.current_session.iloc[self.current_session_index - (self.n_sequences + 1):self.current_session_index][self.out_features].values
            
        else:
            delta = min((self.n_sequences + 1)- self.current_session_index, self.n_sequences)
            zero_cat = np.zeros((delta, len(self.out_features)))
            events = self.current_session.iloc[:max(self.current_session_index, 1)][self.out_features].values
            events = np.concatenate((zero_cat, events), axis=0)
            

        return events.astype(np.float32).T

In [11]:
# %load callback
# %load callback
import pandas as pd
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.logger import TensorBoardOutputFormat
from datetime import datetime

class DistributionCallback(BaseCallback):
    
    @classmethod
    def tensorboard_setup(cls, log_dir, log_freq):
        cls._log_dir = log_dir
        cls._log_freq = log_freq

    
    def _on_step(self) -> bool:
        if self.n_calls % self._log_freq == 0:
            dist_list = self.training_env.env_method('flush_episode_bins')
            values_to_log = [item for sublist in dist_list for item in sublist if len(sublist) > 0]

            values_df = pd.DataFrame(
                values_to_log
            )
            
            
            session_size, sim_size, session_minutes, sim_minutes, ended, reward, inc_time, inc_index = (
                values_df['session_size'].mean(),
                values_df['sim_size'].mean(),
                values_df['session_minutes'].mean(),
                values_df['sim_minutes'].mean(),
                values_df['ended'].mean(),
                values_df['reward'].mean(),
                values_df['incentive_time'].mean(),
                values_df['incentive_index'].mean()
            )
            
            size_stats = {
                'session_size': session_size,
                'sim_size': sim_size,
                'ended': ended,
                'inc_index': inc_index
            }
            
            
            time_stats = {
                'session_minutes': session_minutes,
                'sim_minutes': sim_minutes,
                'reward': reward,
                'inc_time': inc_time   
            }
            
            for key, value in size_stats.items():
                self.logger.record(f'size/{key}', value)
            
            for key, value in time_stats.items():
                self.logger.record(f'time/{key}', value)

            
        return True

In [12]:
# %load policies/cnn_policy
# %load policies/cnn_policy
from typing import Dict, List, Type, Union

import gym
import torch
from gym import spaces
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.dqn.policies import DQNPolicy
from torch import nn


class CustomConv1dFeatures(BaseFeaturesExtractor):
    
    @classmethod
    def setup_sequences_features(cls, n_sequences, n_features):
        cls.n_sequences = n_sequences
        cls.n_features = n_features
        
    
    def __init__(self, observation_space: spaces.Box, features_dim=20):
        super().__init__(observation_space, features_dim)
        
        
        self.cnn_1 = nn.Sequential(
            nn.Conv1d(self.n_features, self.n_features*2, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features*2),
            nn.ReLU(),
            
            nn.Conv1d(self.n_features*2, self.n_features*2, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features*2),
            nn.ReLU(),
            
            nn.Conv1d(self.n_features*2, self.n_features*2, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features*2),
            nn.Conv1d(self.n_features*2, self.n_features*2, kernel_size=3, padding=1),
            
            nn.AvgPool1d(2)
        )
        
        self.cnn_2 = nn.Sequential(
            nn.Conv1d(self.n_features*2, self.n_features, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features),
            nn.ReLU(),
            
            nn.Conv1d(self.n_features, self.n_features, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features),
            nn.ReLU()
        )
        
        self.act = nn.Sequential(
            nn.MaxPool1d(2),
            nn.Flatten(),
        )
        
        with torch.no_grad():
            out_shape = self.act(self.cnn_2(self.cnn_1(torch.zeros((1, self.n_features, self.n_sequences))))).shape[1]
            self.linear = nn.Linear(out_shape, features_dim)
    
    def forward(self, obs):
        out = self.cnn_1(obs)
        out = self.cnn_2(out)
        out = self.act(out)
        return self.linear(out)


        

In [13]:
# %load rl_util
import os
import logging
global logger

logger = logging.getLogger(__name__)
import numpy as np
from pqdm.processes import pqdm
import pdb

def download_dataset_from_s3(client, base_read_path, full_read_path):
    logger.info(f'Downloading data from {base_read_path}')
    os.makedirs(base_read_path, exist_ok=True)
    
    logger.info(f'Downloading data from dissertation-data-dmiller/{full_read_path}')
    client.download_file(
        'dissertation-data-dmiller',
        full_read_path,
        full_read_path
    )
    logger.info(f'Downloaded data from dissertation-data-dmiller/{full_read_path}')
    

def _parralel_partition_users(unique_sessions, df, index, vec_df_path):
    subset_session = df.merge(unique_sessions, on=['user_id', 'session_30_count_raw'], how='inner')
    subset_session.to_parquet(f'{vec_df_path}/batch_{index}.parquet')
    return subset_session.copy().reset_index(drop=True)
    

def batch_environments_for_vectorization(df, n_envs, vec_df_path):
   
    df[['user_id', 'session_30_count_raw']] = df[['user_id', 'session_30_count_raw']].astype(int)
   
    unique_sessions = df[['user_id', 'session_30_count_raw']].drop_duplicates().sample(frac=1).reset_index(drop=True)
    logger.info(f'Unique sessions shape: {unique_sessions.shape}. Splitting into {n_envs} environments')
    unique_session_split = np.array_split(unique_sessions, n_envs)
    
    unique_session_args = [{
        'unique_sessions': sess,
        'df': df,
        'index': i,
        'vec_df_path': vec_df_path,
    } for i, sess in enumerate(unique_session_split)]

    logger.info(f'Environments split: running parralel partitioning')
    result = pqdm(unique_session_args, _parralel_partition_users, n_jobs=os.cpu_count(), argument_type='kwargs')
    logger.info(f'Environments split: finished parralel partitioning')
    return result
        
    

In [14]:
# %load incentive_reinforcement_learning_cpu.py
import argparse
import logging
import os
from datetime import datetime
from functools import reduce
from pprint import pformat
from typing import Callable
import boto3
import random
import numpy as np
import pandas as pd
import torch


from stable_baselines3 import A2C, DQN, PPO
from stable_baselines3.common.callbacks import (CallbackList,
                                                CheckpointCallback,
                                                StopTrainingOnMaxEpisodes)
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor
from stable_baselines3.dqn.policies import DQNPolicy


logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
np.set_printoptions(precision=4, linewidth=200, suppress=True)
torch.set_printoptions(precision=2, linewidth=200, sci_mode=False)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
global logger
logger = logging.getLogger('rl_exp_train')
logger.setLevel(logging.INFO)

S3_BASELINE_PATH = 's3://dissertation-data-dmiller/'
N_SEQUENCES = 40
CHECKPOINT_FREQ = 250_000
TB_LOG = 10_000
WINDOW = 2

def parse_args():
    parse = argparse.ArgumentParser()
    parse.add_argument('--read_path', type=str, default='rl_ready_data_conv')
    parse.add_argument('--n_files', type=int, default=2)
    parse.add_argument('--n_episodes', type=int, default=50)
    parse.add_argument('--n_envs', type=int, default=100)
    parse.add_argument('--lstm', type=str, default='label')
    parse.add_argument('--part', type=str, default='train')
    parse.add_argument('--feature_extractor', type=str, default='cnn') 
    args = parse.parse_args()
    return args


def load_and_dedupe(read_path, cols):
    
    df = pd.read_parquet(read_path, columns=cols)
    return df

def main(args):
    
    
    logger.info('Starting Incentive Reinforcement Learning')
    logger.info(pformat(args.__dict__))
    exec_time = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    
    read_path, n_files, n_episodes, n_envs, lstm, part, feature_ext = (
        args.read_path, 
        args.n_files, 
        args.n_episodes, 
        args.n_envs,
        args.lstm,
        args.part,
        args.feature_extractor,
    )

    base_read_path = os.path.join('rl_ready_data_conv', f'files_used_{n_files}')
    full_read_path = os.path.join(base_read_path, f'window_{WINDOW}_{part}.parquet')
    vec_df_path = os.path.join(base_read_path, f'window_{WINDOW}_{part}_batched')
    load_cols = FEATURE_COLS + METADATA_COLS + PREDICTION_COLS

    base_exp_path = S3_BASELINE_PATH +  '/'.join(
        [
            'rl_experiments',
            f'n_files_{n_files}',
            f'{feature_ext}_{lstm}',
            exec_time,
        ]
    )

    if not os.path.exists(full_read_path):
        client = boto3.client('s3')
        download_dataset_from_s3(client,  base_read_path, full_read_path)
        
    if not os.path.exists(vec_df_path):
        df = load_and_dedupe(full_read_path, cols=load_cols)
        df = df.sort_values(['date_time', 'user_id'])
        logger.info(f'Loaded data with shape {df.shape}')
        os.makedirs(vec_df_path, exist_ok=True)
        logger.info(f'Writing vectorized data to {vec_df_path}')
        batch_environments_for_vectorization(df, n_envs, vec_df_path)
        logger.info(f'Vectorized environments created')
        del df

    logger.info(f'Loading vectorized data from {vec_df_path}')
    vectorized_df = [
        pd.read_parquet(os.path.join(vec_df_path, f'batch_{i}.parquet'))
        for i in range(n_envs)
    ]
    
    out_features = FEATURE_COLS + [lstm] if lstm else FEATURE_COLS


    citizen_science_vec =DummyVecEnv([lambda: CitizenScienceEnv(vec_df, out_features, N_SEQUENCES) for vec_df in vectorized_df])
    monitor_train = VecMonitor(citizen_science_vec)
    
    logger.info(f'Vectorized environments created')



    tensorboard_dir, checkpoint_dir = (
        os.path.join(base_exp_path, 'training_metrics'),
        os.path.join(base_exp_path, 'checkpoints')
    )

    if not os.path.exists(tensorboard_dir):
        logger.info(f'Creating directory {tensorboard_dir} for tensorboard logs')
        os.makedirs(tensorboard_dir)
   
    if not os.path.exists(checkpoint_dir):
        logger.info(f'Creating directory {checkpoint_dir} for checkpoints')
        os.makedirs(checkpoint_dir) 

    callback_max_episodes = StopTrainingOnMaxEpisodes(max_episodes=n_episodes, verbose=1)
    checkpoint_freq = int(CHECKPOINT_FREQ // n_envs)
    log_freq = int(TB_LOG // n_envs)
    checkpoint_callback = CheckpointCallback(
        save_freq=checkpoint_freq,
        save_path=checkpoint_dir, 
        verbose=2
    )
    
    DistributionCallback.tensorboard_setup(tensorboard_dir, log_freq)
    logger_callback = DistributionCallback()
    
    callback_list = CallbackList([checkpoint_callback, logger_callback, callback_max_episodes])
    
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    
    if feature_ext == 'cnn':
        CustomConv1dFeatures.setup_sequences_features(N_SEQUENCES + 1, len(out_features))
        logger.info('Using custom 1 dimensional CNN feature extractor')
        policy_kwargs = dict(
            features_extractor_class=CustomConv1dFeatures,
            net_arch=[10]
        )
        model = DQN(policy='CnnPolicy', env=monitor_train, verbose=1, tensorboard_log=tensorboard_dir, policy_kwargs=policy_kwargs, device=device, stats_window_size=1000)
    else:
        logger.info('Using default MLP feature extractor')
        model = DQN(policy='MlpPolicy', env=monitor_train, verbose=1, tensorboard_log=tensorboard_dir, device=device, stats_window_size=1000)
        
    logger.info(f'Model created: policy')
    
    logger.info(pformat(model.policy))
        
    logger.info(f'Beginning training') 
    
            
    logger.info(pformat([
        'n_episodes: {}'.format(n_episodes),
        'read_path: {}'.format(read_path),
        'n_files: {}'.format(n_files),
        'n_sequences: {}'.format(N_SEQUENCES),
        'n_envs: {}'.format(n_envs),
        'device: {}'.format(device),
        'lstm: {}'.format(lstm),
        'part: {}'.format(part),
        'feature_extractor: {}'.format(feature_ext),
        'tensorboard_dir: {}'.format(tensorboard_dir),
        'checkpoint_dir: {}'.format(checkpoint_dir),
        'checkpoint_freq: {}'.format(checkpoint_freq),
        'tb_freq: {}'.format(log_freq),
    ]))
    
    model.learn(total_timesteps=25_000_000, progress_bar=True, log_interval=log_freq, callback=callback_list)
    



In [15]:
class Argument:
    read_path = 'rl_ready_data_conv'
    n_files = 30
    n_episodes = 500_000
    n_envs = 100
    lstm = 'seq_40'
    part = 'train'
    feature_extractor = None

In [16]:

main(Argument)

06/02/2023 05:02:30 PM Starting Incentive Reinforcement Learning
06/02/2023 05:02:30 PM mappingproxy({'__dict__': <attribute '__dict__' of 'Argument' objects>,
              '__doc__': None,
              '__module__': '__main__',
              '__weakref__': <attribute '__weakref__' of 'Argument' objects>,
              'feature_extractor': None,
              'lstm': 'seq_40',
              'n_envs': 100,
              'n_episodes': 500000,
              'n_files': 30,
              'part': 'train',
              'read_path': 'rl_ready_data_conv'})
06/02/2023 05:02:30 PM Loading vectorized data from rl_ready_data_conv/files_used_30/window_2_train_batched
06/02/2023 05:02:31 PM Vectorized environments created
06/02/2023 05:02:31 PM Creating directory rl_experiments/n_files_30/None_seq_40/2023-06-02_17-02-30/training_metrics for tensorboard logs
06/02/2023 05:02:31 PM Creating directory rl_experiments/n_files_30/None_seq_40/2023-06-02_17-02-30/checkpoints for checkpoints
06/02/2023 05:

Using cuda device


06/02/2023 05:02:32 PM Model created: policy
06/02/2023 05:02:32 PM DQNPolicy(
  (q_net): QNetwork(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (q_net): Sequential(
      (0): Linear(in_features=861, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): ReLU()
      (4): Linear(in_features=64, out_features=2, bias=True)
    )
  )
  (q_net_target): QNetwork(
    (features_extractor): FlattenExtractor(
      (flatten): Flatten(start_dim=1, end_dim=-1)
    )
    (q_net): Sequential(
      (0): Linear(in_features=861, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
      (3): ReLU()
      (4): Linear(in_features=64, out_features=2, bias=True)
    )
  )
)
06/02/2023 05:02:32 PM Beginning training
06/02/2023 05:02:32 PM ['n_episodes: 500000',
 'read_path: rl_ready_data_conv',
 'n_files: 30',
 'n_sequences: 40'

Logging to rl_experiments/n_files_30/None_seq_40/2023-06-02_17-02-30/training_metrics/DQN_1


Output()

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 9.02     |
|    ep_rew_mean      | 4373.64  |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 100      |
|    fps              | 481      |
|    time_elapsed     | 4        |
|    total_timesteps  | 2200     |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 12.1      |
|    ep_rew_mean      | 12749.428 |
|    exploration_rate | 0.999     |
| time/               |           |
|    episodes         | 200       |
|    fps              | 605       |
|    time_elapsed     | 6         |
|    total_timesteps  | 3900      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 13.4      |
|    ep_rew_mean      | 11119.108 |
|    exploration_rate | 0.998     |
| time/               |           |
|    episodes         | 300       |
|    fps              | 674       |
|    time_elapsed     | 8         |
|    total_timesteps  | 5800      |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 14.9      |
|    ep_rew_mean      | 10505.407 |
|    exploration_rate | 0.997     |
| time/               |           |
|    episodes         | 400       |
|    fps              | 730       |
|    time_elapsed     | 10        |
|    total_timesteps  | 8000      |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 10132.38 |
|    exploration_rate | 0.996    |
| size/               |          |
|    ended            | 103      |
|    inc_index        | 21.7     |
|    session_size     | 20.9     |
|    sim_size         | 15       |
| time/               |          |
|    episodes         | 500      |
|    fps              | 743      |
|    inc_time         | 228      |
|    reward           | 2.1e+03  |
|    session_minutes  | 2.12e+03 |
|    sim_minutes      | 1.77e+03 |
|    time_elapsed     | 13       |
|    total_timesteps  | 10300    |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 16.9      |
|    ep_rew_mean      | 10344.601 |
|    exploration_rate | 0.995     |
| time/               |           |
|    episodes         | 600       |
|    fps              | 759       |
|    time_elapsed     | 16        |
|    total_timesteps  | 12400     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 17.5      |
|    ep_rew_mean      | 11137.973 |
|    exploration_rate | 0.995     |
| time/               |           |
|    episodes         | 700       |
|    fps              | 761       |
|    time_elapsed     | 18        |
|    total_timesteps  | 14200     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 17.7      |
|    ep_rew_mean      | 10821.175 |
|    exploration_rate | 0.994     |
| time/               |           |
|    episodes         | 800       |
|    fps              | 771       |
|    time_elapsed     | 21        |
|    total_timesteps  | 16300     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 17.9      |
|    ep_rew_mean      | 10990.493 |
|    exploration_rate | 0.993     |
| time/               |           |
|    episodes         | 900       |
|    fps              | 778       |
|    time_elapsed     | 23        |
|    total_timesteps  | 18400     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 18.2      |
|    ep_rew_mean      | 12731.402 |
|    exploration_rate | 0.992     |
| size/               |           |
|    ended            | 120       |
|    inc_index        | 16.8      |
|    session_size     | 29.2      |
|    sim_size         | 20.7      |
| time/               |           |
|    episodes         | 1000      |
|    fps              | 785       |
|    inc_time         | 479       |
|    reward           | 1.45e+03  |
|    session_minutes  | 1.47e+03  |
|    sim_minutes      | 1.25e+03  |
|    time_elapsed     | 25        |
|    total_timesteps  | 20200     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 19.2      |
|    ep_rew_mean      | 13331.803 |
|    exploration_rate | 0.992     |
| time/               |           |
|    episodes         | 1100      |
|    fps              | 792       |
|    time_elapsed     | 28        |
|    total_timesteps  | 22300     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 19.9      |
|    ep_rew_mean      | 11810.624 |
|    exploration_rate | 0.991     |
| time/               |           |
|    episodes         | 1200      |
|    fps              | 802       |
|    time_elapsed     | 30        |
|    total_timesteps  | 24500     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 20.3      |
|    ep_rew_mean      | 11931.449 |
|    exploration_rate | 0.99      |
| time/               |           |
|    episodes         | 1300      |
|    fps              | 809       |
|    time_elapsed     | 32        |
|    total_timesteps  | 26700     |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.4     |
|    ep_rew_mean      | 12471.94 |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 1400     |
|    fps              | 810      |
|    time_elapsed     | 35       |
|    total_timesteps  | 28900    |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 20.9      |
|    ep_rew_mean      | 12507.876 |
|    exploration_rate | 0.988     |
| size/               |           |
|    ended            | 124       |
|    inc_index        | 15.2      |
|    session_size     | 28.6      |
|    sim_size         | 20.3      |
| time/               |           |
|    episodes         | 1500      |
|    fps              | 810       |
|    inc_time         | 208       |
|    reward           | 1.43e+03  |
|    session_minutes  | 1.46e+03  |
|    sim_minutes      | 972       |
|    time_elapsed     | 37        |
|    total_timesteps  | 30700     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 20.5      |
|    ep_rew_mean      | 12897.886 |
|    exploration_rate | 0.988     |
| time/               |           |
|    episodes         | 1600      |
|    fps              | 815       |
|    time_elapsed     | 40        |
|    total_timesteps  | 32700     |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.2     |
|    ep_rew_mean      | 12187.32 |
|    exploration_rate | 0.987    |
| time/               |          |
|    episodes         | 1700     |
|    fps              | 821      |
|    time_elapsed     | 42       |
|    total_timesteps  | 34800    |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 20.5      |
|    ep_rew_mean      | 12397.438 |
|    exploration_rate | 0.986     |
| time/               |           |
|    episodes         | 1800      |
|    fps              | 822       |
|    time_elapsed     | 44        |
|    total_timesteps  | 36900     |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.7     |
|    ep_rew_mean      | 12398.85 |
|    exploration_rate | 0.985    |
| time/               |          |
|    episodes         | 1900     |
|    fps              | 824      |
|    time_elapsed     | 47       |
|    total_timesteps  | 39300    |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 20.8      |
|    ep_rew_mean      | 10537.823 |
|    exploration_rate | 0.984     |
| size/               |           |
|    ended            | 112       |
|    inc_index        | 20.6      |
|    session_size     | 29.1      |
|    sim_size         | 20.7      |
| time/               |           |
|    episodes         | 2000      |
|    fps              | 826       |
|    inc_time         | 449       |
|    reward           | 2.23e+03  |
|    session_minutes  | 2.25e+03  |
|    sim_minutes      | 1.82e+03  |
|    time_elapsed     | 50        |
|    total_timesteps  | 41400     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21        |
|    ep_rew_mean      | 10328.152 |
|    exploration_rate | 0.983     |
| time/               |           |
|    episodes         | 2100      |
|    fps              | 828       |
|    time_elapsed     | 52        |
|    total_timesteps  | 43600     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 20.9      |
|    ep_rew_mean      | 10570.951 |
|    exploration_rate | 0.983     |
| time/               |           |
|    episodes         | 2200      |
|    fps              | 832       |
|    time_elapsed     | 54        |
|    total_timesteps  | 45700     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.4      |
|    ep_rew_mean      | 11093.696 |
|    exploration_rate | 0.982     |
| time/               |           |
|    episodes         | 2300      |
|    fps              | 837       |
|    time_elapsed     | 57        |
|    total_timesteps  | 48000     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.6      |
|    ep_rew_mean      | 11774.369 |
|    exploration_rate | 0.981     |
| size/               |           |
|    ended            | 123       |
|    inc_index        | 17.6      |
|    session_size     | 31.1      |
|    sim_size         | 22.1      |
| time/               |           |
|    episodes         | 2400      |
|    fps              | 841       |
|    inc_time         | 444       |
|    reward           | 1.38e+03  |
|    session_minutes  | 1.4e+03   |
|    sim_minutes      | 1.21e+03  |
|    time_elapsed     | 59        |
|    total_timesteps  | 50200     |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.6      |
|    ep_rew_mean      | 11908.382 |
|    exploration_rate | 0.98      |
| time/               |           |
|    episodes         | 2500      |
|    fps              | 827       |
|    time_elapsed     | 62        |
|    total_timesteps  | 52000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 80.5      |
|    n_updates        | 4         |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.5      |
|    ep_rew_mean      | 12093.182 |
|    exploration_rate | 0.979     |
| time/               |           |
|    episodes         | 2600      |
|    fps              | 829       |
|    time_elapsed     | 65        |
|    total_timesteps  | 54400     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 392       |
|    n_updates        | 10        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.8      |
|    ep_rew_mean      | 11592.525 |
|    exploration_rate | 0.978     |
| time/               |           |
|    episodes         | 2700      |
|    fps              | 829       |
|    time_elapsed     | 68        |
|    total_timesteps  | 56700     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 113       |
|    n_updates        | 16        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.8      |
|    ep_rew_mean      | 11284.586 |
|    exploration_rate | 0.978     |
| time/               |           |
|    episodes         | 2800      |
|    fps              | 829       |
|    time_elapsed     | 70        |
|    total_timesteps  | 58700     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 3.74e+03  |
|    n_updates        | 21        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.6      |
|    ep_rew_mean      | 12274.531 |
|    exploration_rate | 0.977     |
| size/               |           |
|    ended            | 134       |
|    inc_index        | 24.7      |
|    session_size     | 29.4      |
|    sim_size         | 20.9      |
| time/               |           |
|    episodes         | 2900      |
|    fps              | 829       |
|    inc_time         | 584       |
|    reward           | 1.39e+03  |
|    session_minutes  | 1.41e+03  |
|    sim_minutes      | 1.1e+03   |
|    time_elapsed     | 73        |
|    total_timesteps  | 61100     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 69        |
|    n_updates        | 27        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.8      |
|    ep_rew_mean      | 13456.938 |
|    exploration_rate | 0.976     |
| time/               |           |
|    episodes         | 3000      |
|    fps              | 831       |
|    time_elapsed     | 76        |
|    total_timesteps  | 63400     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 148       |
|    n_updates        | 33        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.5      |
|    ep_rew_mean      | 14323.256 |
|    exploration_rate | 0.975     |
| time/               |           |
|    episodes         | 3100      |
|    fps              | 833       |
|    time_elapsed     | 78        |
|    total_timesteps  | 65700     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 201       |
|    n_updates        | 39        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.5      |
|    ep_rew_mean      | 14059.208 |
|    exploration_rate | 0.974     |
| time/               |           |
|    episodes         | 3200      |
|    fps              | 834       |
|    time_elapsed     | 80        |
|    total_timesteps  | 67500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 113       |
|    n_updates        | 43        |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.8     |
|    ep_rew_mean      | 13684.87 |
|    exploration_rate | 0.974    |
| time/               |          |
|    episodes         | 3300     |
|    fps              | 836      |
|    time_elapsed     | 83       |
|    total_timesteps  | 69700    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 62.9     |
|    n_updates        | 49       |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.1      |
|    ep_rew_mean      | 12929.726 |
|    exploration_rate | 0.973     |
| size/               |           |
|    ended            | 113       |
|    inc_index        | 20.1      |
|    session_size     | 32.3      |
|    sim_size         | 22.9      |
| time/               |           |
|    episodes         | 3400      |
|    fps              | 836       |
|    inc_time         | 653       |
|    reward           | 3.01e+03  |
|    session_minutes  | 3.04e+03  |
|    sim_minutes      | 2.69e+03  |
|    time_elapsed     | 85        |
|    total_timesteps  | 71800     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 712       |
|    n_updates        | 54        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.8      |
|    ep_rew_mean      | 12714.659 |
|    exploration_rate | 0.972     |
| time/               |           |
|    episodes         | 3500      |
|    fps              | 834       |
|    time_elapsed     | 88        |
|    total_timesteps  | 73900     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 1.13e+03  |
|    n_updates        | 59        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.7      |
|    ep_rew_mean      | 11253.406 |
|    exploration_rate | 0.971     |
| time/               |           |
|    episodes         | 3600      |
|    fps              | 837       |
|    time_elapsed     | 90        |
|    total_timesteps  | 76100     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 1.02e+03  |
|    n_updates        | 65        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.5      |
|    ep_rew_mean      | 11448.238 |
|    exploration_rate | 0.97      |
| time/               |           |
|    episodes         | 3700      |
|    fps              | 839       |
|    time_elapsed     | 93        |
|    total_timesteps  | 78200     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 661       |
|    n_updates        | 70        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.4      |
|    ep_rew_mean      | 12091.219 |
|    exploration_rate | 0.969     |
| size/               |           |
|    ended            | 148       |
|    inc_index        | 35.5      |
|    session_size     | 28.1      |
|    sim_size         | 20        |
| time/               |           |
|    episodes         | 3800      |
|    fps              | 841       |
|    inc_time         | 227       |
|    reward           | 1.83e+03  |
|    session_minutes  | 1.85e+03  |
|    sim_minutes      | 1.41e+03  |
|    time_elapsed     | 95        |
|    total_timesteps  | 80700     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 264       |
|    n_updates        | 76        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.7      |
|    ep_rew_mean      | 10917.202 |
|    exploration_rate | 0.968     |
| time/               |           |
|    episodes         | 3900      |
|    fps              | 841       |
|    time_elapsed     | 98        |
|    total_timesteps  | 83000     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 187       |
|    n_updates        | 82        |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22        |
|    ep_rew_mean      | 10384.922 |
|    exploration_rate | 0.968     |
| time/               |           |
|    episodes         | 4000      |
|    fps              | 840       |
|    time_elapsed     | 101       |
|    total_timesteps  | 85500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 384       |
|    n_updates        | 88        |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | 9719.892 |
|    exploration_rate | 0.967    |
| time/               |          |
|    episodes         | 4100     |
|    fps              | 841      |
|    time_elapsed     | 104      |
|    total_timesteps  | 88000    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 684      |
|    n_updates        | 94       |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.5     |
|    ep_rew_mean      | 9605.846 |
|    exploration_rate | 0.966    |
| size/               |          |
|    ended            | 136      |
|    inc_index        | 24.8     |
|    session_size     | 35.9     |
|    sim_size         | 25.4     |
| time/               |          |
|    episodes         | 4200     |
|    fps              | 841      |
|    inc_time         | 101      |
|    reward           | 2.41e+03 |
|    session_minutes  | 2.44e+03 |
|    sim_minutes      | 1.23e+03 |
|    time_elapsed     | 107      |
|    total_timesteps  | 90200    |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 159      |
|    n_updates        | 100      |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.8      |
|    ep_rew_mean      | 10517.297 |
|    exploration_rate | 0.964     |
| time/               |           |
|    episodes         | 4400      |
|    fps              | 845       |
|    time_elapsed     | 112       |
|    total_timesteps  | 95500     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 133       |
|    n_updates        | 113       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 23.2      |
|    ep_rew_mean      | 10806.014 |
|    exploration_rate | 0.963     |
| time/               |           |
|    episodes         | 4500      |
|    fps              | 845       |
|    time_elapsed     | 115       |
|    total_timesteps  | 97900     |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 121       |
|    n_updates        | 119       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 23.6      |
|    ep_rew_mean      | 12068.222 |
|    exploration_rate | 0.962     |
| size/               |           |
|    ended            | 126       |
|    inc_index        | 19.9      |
|    session_size     | 33.3      |
|    sim_size         | 23.6      |
| time/               |           |
|    episodes         | 4600      |
|    fps              | 846       |
|    inc_time         | 111       |
|    reward           | 1.95e+03  |
|    session_minutes  | 1.98e+03  |
|    sim_minutes      | 1.94e+03  |
|    time_elapsed     | 118       |
|    total_timesteps  | 100200    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 169       |
|    n_updates        | 125       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 24.4      |
|    ep_rew_mean      | 12995.378 |
|    exploration_rate | 0.961     |
| time/               |           |
|    episodes         | 4700      |
|    fps              | 847       |
|    time_elapsed     | 120       |
|    total_timesteps  | 102500    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 337       |
|    n_updates        | 131       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 24.8      |
|    ep_rew_mean      | 12166.913 |
|    exploration_rate | 0.96      |
| time/               |           |
|    episodes         | 4800      |
|    fps              | 848       |
|    time_elapsed     | 123       |
|    total_timesteps  | 104500    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 906       |
|    n_updates        | 136       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 24.1      |
|    ep_rew_mean      | 11469.119 |
|    exploration_rate | 0.959     |
| time/               |           |
|    episodes         | 4900      |
|    fps              | 849       |
|    time_elapsed     | 125       |
|    total_timesteps  | 106700    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 168       |
|    n_updates        | 141       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 23.5      |
|    ep_rew_mean      | 11693.952 |
|    exploration_rate | 0.959     |
| time/               |           |
|    episodes         | 5000      |
|    fps              | 851       |
|    time_elapsed     | 128       |
|    total_timesteps  | 109200    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 126       |
|    n_updates        | 147       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 23.1      |
|    ep_rew_mean      | 12010.926 |
|    exploration_rate | 0.958     |
| size/               |           |
|    ended            | 121       |
|    inc_index        | 22.1      |
|    session_size     | 30.3      |
|    sim_size         | 21.5      |
| time/               |           |
|    episodes         | 5100      |
|    fps              | 852       |
|    inc_time         | 498       |
|    reward           | 1.98e+03  |
|    session_minutes  | 2.01e+03  |
|    sim_minutes      | 1.61e+03  |
|    time_elapsed     | 130       |
|    total_timesteps  | 111300    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 186       |
|    n_updates        | 153       |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.1     |
|    ep_rew_mean      | 12897.86 |
|    exploration_rate | 0.957    |
| time/               |          |
|    episodes         | 5200     |
|    fps              | 853      |
|    time_elapsed     | 132      |
|    total_timesteps  | 113400   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 84.9     |
|    n_updates        | 158      |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 23        |
|    ep_rew_mean      | 11711.038 |
|    exploration_rate | 0.956     |
| time/               |           |
|    episodes         | 5300      |
|    fps              | 852       |
|    time_elapsed     | 135       |
|    total_timesteps  | 115200    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 87.6      |
|    n_updates        | 162       |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.7     |
|    ep_rew_mean      | 12383.95 |
|    exploration_rate | 0.955    |
| time/               |          |
|    episodes         | 5400     |
|    fps              | 852      |
|    time_elapsed     | 137      |
|    total_timesteps  | 117600   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 116      |
|    n_updates        | 168      |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.4      |
|    ep_rew_mean      | 12609.732 |
|    exploration_rate | 0.955     |
| time/               |           |
|    episodes         | 5500      |
|    fps              | 852       |
|    time_elapsed     | 140       |
|    total_timesteps  | 119500    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 121       |
|    n_updates        | 173       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.8      |
|    ep_rew_mean      | 11453.244 |
|    exploration_rate | 0.954     |
| size/               |           |
|    ended            | 131       |
|    inc_index        | 24.8      |
|    session_size     | 30.7      |
|    sim_size         | 21.8      |
| time/               |           |
|    episodes         | 5600      |
|    fps              | 854       |
|    inc_time         | 236       |
|    reward           | 2.37e+03  |
|    session_minutes  | 2.4e+03   |
|    sim_minutes      | 2.36e+03  |
|    time_elapsed     | 142       |
|    total_timesteps  | 121700    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 154       |
|    n_updates        | 179       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.5      |
|    ep_rew_mean      | 10534.688 |
|    exploration_rate | 0.953     |
| time/               |           |
|    episodes         | 5700      |
|    fps              | 855       |
|    time_elapsed     | 144       |
|    total_timesteps  | 123900    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 434       |
|    n_updates        | 184       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.3      |
|    ep_rew_mean      | 11331.196 |
|    exploration_rate | 0.952     |
| time/               |           |
|    episodes         | 5800      |
|    fps              | 854       |
|    time_elapsed     | 148       |
|    total_timesteps  | 126400    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 150       |
|    n_updates        | 190       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22        |
|    ep_rew_mean      | 11852.646 |
|    exploration_rate | 0.951     |
| time/               |           |
|    episodes         | 5900      |
|    fps              | 854       |
|    time_elapsed     | 150       |
|    total_timesteps  | 128700    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 49.3      |
|    n_updates        | 196       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22        |
|    ep_rew_mean      | 10736.685 |
|    exploration_rate | 0.95      |
| size/               |           |
|    ended            | 125       |
|    inc_index        | 22.4      |
|    session_size     | 30.6      |
|    sim_size         | 21.8      |
| time/               |           |
|    episodes         | 6000      |
|    fps              | 853       |
|    inc_time         | 96.2      |
|    reward           | 1.36e+03  |
|    session_minutes  | 1.39e+03  |
|    sim_minutes      | 815       |
|    time_elapsed     | 153       |
|    total_timesteps  | 130700    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 212       |
|    n_updates        | 201       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.6      |
|    ep_rew_mean      | 10288.412 |
|    exploration_rate | 0.95      |
| time/               |           |
|    episodes         | 6100      |
|    fps              | 853       |
|    time_elapsed     | 155       |
|    total_timesteps  | 132700    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 812       |
|    n_updates        | 206       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.5      |
|    ep_rew_mean      | 10158.033 |
|    exploration_rate | 0.949     |
| time/               |           |
|    episodes         | 6200      |
|    fps              | 854       |
|    time_elapsed     | 157       |
|    total_timesteps  | 134900    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 91.4      |
|    n_updates        | 212       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22        |
|    ep_rew_mean      | 11439.435 |
|    exploration_rate | 0.948     |
| time/               |           |
|    episodes         | 6300      |
|    fps              | 855       |
|    time_elapsed     | 160       |
|    total_timesteps  | 137200    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 73.6      |
|    n_updates        | 217       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.8      |
|    ep_rew_mean      | 10871.846 |
|    exploration_rate | 0.947     |
| time/               |           |
|    episodes         | 6400      |
|    fps              | 855       |
|    time_elapsed     | 162       |
|    total_timesteps  | 139400    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 86.8      |
|    n_updates        | 223       |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.7     |
|    ep_rew_mean      | 9765.573 |
|    exploration_rate | 0.946    |
| size/               |          |
|    ended            | 104      |
|    inc_index        | 18.1     |
|    session_size     | 30.4     |
|    sim_size         | 21.6     |
| time/               |          |
|    episodes         | 6500     |
|    fps              | 855      |
|    inc_time         | 287      |
|    reward           | 2.07e+03 |
|    session_minutes  | 2.1e+03  |
|    sim_minutes      | 1.38e+03 |
|    time_elapsed     | 165      |
|    total_timesteps  | 141500   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 512      |
|    n_updates        | 228      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22       |
|    ep_rew_mean      | 9849.221 |
|    exploration_rate | 0.945    |
| time/               |          |
|    episodes         | 6600     |
|    fps              | 856      |
|    time_elapsed     | 167      |
|    total_timesteps  | 143600   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 203      |
|    n_updates        | 233      |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.1      |
|    ep_rew_mean      | 11834.222 |
|    exploration_rate | 0.945     |
| time/               |           |
|    episodes         | 6700      |
|    fps              | 857       |
|    time_elapsed     | 170       |
|    total_timesteps  | 145900    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 92.4      |
|    n_updates        | 239       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22        |
|    ep_rew_mean      | 11988.856 |
|    exploration_rate | 0.944     |
| time/               |           |
|    episodes         | 6800      |
|    fps              | 856       |
|    time_elapsed     | 172       |
|    total_timesteps  | 147800    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 99        |
|    n_updates        | 244       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.7      |
|    ep_rew_mean      | 11637.367 |
|    exploration_rate | 0.943     |
| size/               |           |
|    ended            | 127       |
|    inc_index        | 25.2      |
|    session_size     | 30.3      |
|    sim_size         | 21.5      |
| time/               |           |
|    episodes         | 6900      |
|    fps              | 857       |
|    inc_time         | 421       |
|    reward           | 1.93e+03  |
|    session_minutes  | 1.96e+03  |
|    sim_minutes      | 1.67e+03  |
|    time_elapsed     | 175       |
|    total_timesteps  | 150200    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 1.38e+03  |
|    n_updates        | 250       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 21.7      |
|    ep_rew_mean      | 12577.084 |
|    exploration_rate | 0.942     |
| time/               |           |
|    episodes         | 7000      |
|    fps              | 857       |
|    time_elapsed     | 178       |
|    total_timesteps  | 152800    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 148       |
|    n_updates        | 256       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22        |
|    ep_rew_mean      | 12115.897 |
|    exploration_rate | 0.941     |
| time/               |           |
|    episodes         | 7100      |
|    fps              | 857       |
|    time_elapsed     | 180       |
|    total_timesteps  | 155000    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 230       |
|    n_updates        | 262       |
-----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 22.2      |
|    ep_rew_mean      | 12046.026 |
|    exploration_rate | 0.94      |
| time/               |           |
|    episodes         | 7200      |
|    fps              | 858       |
|    time_elapsed     | 183       |
|    total_timesteps  | 157500    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 538       |
|    n_updates        | 268       |
-----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.1     |
|    ep_rew_mean      | 10854.06 |
|    exploration_rate | 0.939    |
| time/               |          |
|    episodes         | 7300     |
|    fps              | 859      |
|    time_elapsed     | 186      |
|    total_timesteps  | 159800   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 573      |
|    n_updates        | 274      |
----------------------------------


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.6     |
|    ep_rew_mean      | 9934.7   |
|    exploration_rate | 0.938    |
| size/               |          |
|    ended            | 134      |
|    inc_index        | 25.2     |
|    session_size     | 32.6     |
|    sim_size         | 23.2     |
| time/               |          |
|    episodes         | 7400     |
|    fps              | 859      |
|    inc_time         | 306      |
|    reward           | 1.46e+03 |
|    session_minutes  | 1.49e+03 |
|    sim_minutes      | 1.18e+03 |
|    time_elapsed     | 188      |
|    total_timesteps  | 162200   |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 1.19e+03 |
|    n_updates        | 280      |
----------------------------------


-----------------------------------
| rollout/            |           |
|    ep_len_mean      | 23.2      |
|    ep_rew_mean      | 11937.264 |
|    exploration_rate | 0.937     |
| time/               |           |
|    episodes         | 7500      |
|    fps              | 860       |
|    time_elapsed     | 191       |
|    total_timesteps  | 164500    |
| train/              |           |
|    learning_rate    | 0.0001    |
|    loss             | 914       |
|    n_updates        | 286       |
-----------------------------------
