In [1]:
import cbpro 
import ray 
import numpy as np 

from datetime import datetime, timedelta
import time

from ray import tune 
from ray.tune.registry import register_env 

import tensortrade.env.default as default

from tensortrade.feed.core import DataFeed, Stream
from tensortrade.oms.instruments import Instrument
from tensortrade.oms.instruments import USD, BTC, ETH
from tensortrade.oms.exchanges import Exchange
from tensortrade.oms.services.execution.simulated import execute_order
from tensortrade.oms.wallets import Wallet, Portfolio

from tensortrade.feed.core import NameSpace




In [2]:
def get_data_range(start, end, granularity, product):
    delta = timedelta(seconds=granularity)
    cur_time = start
    data = np.array([], dtype=np.float32).reshape(0,6)
    while cur_time < end:
        print(cur_time)
        cur_segment = public_client.get_product_historic_rates(product, start=cur_time, end=(cur_time + (delta * 300)), granularity=granularity)
        #print(len(cur_segment))
        cur_time = cur_time + (delta * len(cur_segment))
        #print(cur_time)
        cur_segment = np.flip(np.array(cur_segment), axis=0)
        #print(cur_segment.shape)
        data = np.concatenate((data, cur_segment), axis=0)
        print(data.shape)
        time.sleep(0.34)
    return data 

public_client = cbpro.PublicClient()

now = datetime.now() 
delta = timedelta(days = 60)
start = now - delta
print(start)

ETH_USD = get_data_range(start, now, 3600, 'ETH-USD')
print('done')
BTC_USD = get_data_range(start, now, 3600, 'BTC-USD')
print('done')
ETH_BTC = get_data_range(start, now, 3600, 'ETH-BTC')
print('done')

2021-03-03 12:41:07.245424
2021-03-03 12:41:07.245424
(300, 6)
2021-03-16 00:41:07.245424
(600, 6)
2021-03-28 12:41:07.245424
(900, 6)
2021-04-10 00:41:07.245424
(1200, 6)
2021-04-22 12:41:07.245424
(1444, 6)
done
2021-03-03 12:41:07.245424
(300, 6)
2021-03-16 00:41:07.245424
(600, 6)
2021-03-28 12:41:07.245424
(900, 6)
2021-04-10 00:41:07.245424
(1200, 6)
2021-04-22 12:41:07.245424
(1444, 6)
done
2021-03-03 12:41:07.245424
(300, 6)
2021-03-16 00:41:07.245424
(600, 6)
2021-03-28 12:41:07.245424
(900, 6)
2021-04-10 00:41:07.245424
(1200, 6)
2021-04-22 12:41:07.245424
(1444, 6)
done


In [8]:
def setup_env(config):
    ETH_USD = config['ETH_USD']
    BTC_USD = config['BTC_USD']
    ETH_BTC = config['ETH_BTC']
    #ETH_USD, BTC_USD, ETH_BTC
    coinbase = Exchange("Coinbase", service=execute_order)(
        Stream.source(ETH_USD[:, 4] , dtype="float").rename("USD-ETH"),
        Stream.source(BTC_USD[:, 4], dtype="float").rename("USD-BTC"),
    )
    with NameSpace("coinbase"):
        coinbase_streams = [
            Stream.source(ETH_USD[:, 0] , dtype="float").rename("ETH:date"),
            Stream.source(ETH_USD[:, 1] , dtype="float").rename("ETH:open"),
            Stream.source(ETH_USD[:, 2] , dtype="float").rename("ETH:high"),
            Stream.source(ETH_USD[:, 3] , dtype="float").rename("ETH:low"),
            Stream.source(ETH_USD[:, 4] , dtype="float").rename("ETH:close"),
            Stream.source(ETH_USD[:, 5] , dtype="float").rename("ETH:volume"),
        
            Stream.source(BTC_USD[:, 0] , dtype="float").rename("BTC:date"),
            Stream.source(BTC_USD[:, 1] , dtype="float").rename("BTC:open"),
            Stream.source(BTC_USD[:, 2] , dtype="float").rename("BTC:high"),
            Stream.source(BTC_USD[:, 3] , dtype="float").rename("BTC:low"),
            Stream.source(BTC_USD[:, 4] , dtype="float").rename("BTC:close"),
            Stream.source(BTC_USD[:, 5] , dtype="float").rename("BTC:volume"),
        ]
        
        
    feed = DataFeed(coinbase_streams)

    portfolio = Portfolio(USD, [
        Wallet(coinbase, 3000 * USD),
        Wallet(coinbase, 0.01 * BTC),
        Wallet(coinbase, 0.3 * ETH),
    ])

    renderer_feed = DataFeed([
        Stream.source(ETH_USD[:, 0] , dtype="float").rename("date"),
        #Stream.source(ETH_USD[:, 1] , dtype="float").rename("open"),
        #Stream.source(ETH_USD[:, 2] , dtype="float").rename("high"),
        #Stream.source(ETH_USD[:, 3] , dtype="float").rename("low"),
        Stream.source(ETH_USD[:, 4] , dtype="float").rename("close"),
        Stream.source(ETH_USD[:, 5] , dtype="float").rename("volume"),
    ])
    
    
    

    env = default.create(
        portfolio=portfolio,
        #action_scheme="managed-risk",
        action_scheme="simple",
        reward_scheme="risk-adjusted",
        feed=feed,
        #renderer_feed=renderer_feed,
        #renderer=default.renderers.PlotlyTradingChart(),
        window_size=20
    )

    return env


register_env("TradingEnv", setup_env)




In [9]:
class MyKerasQModel(DistributionalQTFModel):
    """Custom model for DQN."""

    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, **kw):
        super(MyKerasQModel, self).__init__(
            obs_space, action_space, num_outputs, model_config, name, **kw)

        # Define the core model layers which will be used by the other
        # output heads of DistributionalQModel
        self.inputs = tf.keras.layers.Input(
            shape=obs_space.shape, name="observations")
        layer_1 = tf.keras.layers.Dense(
            128,
            name="my_layer1",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(self.inputs)
        layer_out = tf.keras.layers.Dense(
            num_outputs,
            name="my_out",
            activation=tf.nn.relu,
            kernel_initializer=normc_initializer(1.0))(layer_1)
        self.base_model = tf.keras.Model(self.inputs, layer_out)

    # Implement the core forward method.
    def forward(self, input_dict, state, seq_lens):
        model_out = self.base_model(input_dict["obs"])
        return model_out, state

    def metrics(self):
        return {"foo": tf.constant(42.0)}


NameError: name 'DistributionalQTFModel' is not defined

In [6]:
from ray.rllib.agents import with_common_config
import tensorflow as tf

env_cfg = {
        "ETH_USD": ETH_USD,
        "BTC_USD": BTC_USD, 
        "ETH_BTC": ETH_BTC,
    }

DEFAULT_CONFIG = with_common_config({
    "env": "TradingEnv",
    "env_config": env_cfg,

    # === Model ===
    # Number of atoms for representing the distribution of return. When
    # this is greater than 1, distributional Q-learning is used.
    # the discrete supports are bounded by v_min and v_max
    "num_atoms": 1,
    "v_min": -10.0,
    "v_max": 10.0,
    # Whether to use noisy network
    "noisy": True,
    # control the initial value of noisy nets
    "sigma0": 0.5,
    # Whether to use dueling dqn
    "dueling": True,
    # Dense-layer setup for each the advantage branch and the value branch
    # in a dueling architecture.
    "hiddens": [512, 256, 128],
    # Whether to use double dqn
    "double_q": True,
    # N-step Q learning
    "n_step": 5,

    # === Exploration Settings ===
    "exploration_config": {
        # The Exploration class to use.
        "type": "EpsilonGreedy",
        # Config for the Exploration class' constructor:
        "initial_epsilon": 1.0,
        "final_epsilon": 0.02,
        "epsilon_timesteps": 10000,  # Timesteps over which to anneal epsilon.

        # For soft_q, use:
        # "exploration_config" = {
        #   "type": "SoftQ"
        #   "temperature": [float, e.g. 1.0]
        # }
    },
    # Switch to greedy actions in evaluation workers.
    "evaluation_config": {
        "explore": False,
    },

    # Minimum env steps to optimize for per train call. This value does
    # not affect learning, only the length of iterations.
    "timesteps_per_iteration": 15,
    # Update the target network every `target_network_update_freq` steps.
    "target_network_update_freq": 25,
    # === Replay buffer ===
    # Size of the replay buffer. Note that if async_updates is set, then
    # each worker will have a replay buffer of this size.
    "buffer_size": 20000,
    # If True prioritized replay buffer will be used.
    "prioritized_replay": True,
    # Alpha parameter for prioritized replay buffer.
    "prioritized_replay_alpha": 0.6,
    # Beta parameter for sampling from prioritized replay buffer.
    "prioritized_replay_beta": 0.4,
    # Final value of beta (by default, we use constant beta=0.4).
    "final_prioritized_replay_beta": 0.4,
    # Time steps over which the beta parameter is annealed.
    "prioritized_replay_beta_annealing_timesteps": 20000,
    # Epsilon to add to the TD errors when updating priorities.
    "prioritized_replay_eps": 1e-6,
    # Whether to LZ4 compress observations
    "compress_observations": False,
    # Callback to run before learning on a multi-agent batch of experiences.
    "before_learn_on_batch": None,
    # If set, this will fix the ratio of replayed from a buffer and learned on
    # timesteps to sampled from an environment and stored in the replay buffer
    # timesteps. Otherwise, the replay will proceed at the native ratio
    # determined by (train_batch_size / rollout_fragment_length).
    "training_intensity": None,

    # === Optimization ===
    # Learning rate for adam optimizer
    "lr": tune.loguniform(1e-6, 5e-4), 
    # Learning rate schedule
    "lr_schedule": None,
    # Adam epsilon hyper parameter
    "adam_epsilon": 1e-8,
    # If not None, clip gradients during optimization at this value
    "grad_clip": 40,
    # How many steps of the model to sample before learning starts.
    "learning_starts": 1000,
    # Update the replay buffer with this many samples at once. Note that
    # this setting applies per-worker if num_workers > 1.
    "rollout_fragment_length": 4,
    # Size of a batch sampled from replay buffer for training. Note that
    # if async_updates is set, then each worker returns gradients for a
    # batch of this size.
    "train_batch_size": 32,
    "evaluation_num_episodes" : 10,
    
    # === Parallelism ===
    # Number of workers for collecting samples with. This only makes sense
    # to increase if your environment is particularly slow to sample, or if
    # you"re using the Async or Ape-X optimizers.
    "num_workers": 2,
    # Whether to compute priorities on workers.
    "worker_side_prioritization": False,
    # Prevent iterations from going lower than this time span
    "min_iter_time_s": 1,
    
    "num_gpus": 2,
    #"num_gpus_per_worker" : 1, 
})

DEFAULT_CONFIG


{'num_workers': 2,
 'num_envs_per_worker': 1,
 'create_env_on_driver': False,
 'rollout_fragment_length': 4,
 'batch_mode': 'truncate_episodes',
 'train_batch_size': 32,
 'model': {'fcnet_hiddens': [256, 256],
  'fcnet_activation': 'tanh',
  'conv_filters': None,
  'conv_activation': 'relu',
  'post_fcnet_hiddens': [],
  'post_fcnet_activation': 'relu',
  'free_log_std': False,
  'no_final_linear': False,
  'vf_share_layers': True,
  'use_lstm': False,
  'max_seq_len': 20,
  'lstm_cell_size': 256,
  'lstm_use_prev_action': False,
  'lstm_use_prev_reward': False,
  '_time_major': False,
  'use_attention': False,
  'attention_num_transformer_units': 1,
  'attention_dim': 64,
  'attention_num_heads': 1,
  'attention_head_dim': 32,
  'attention_memory_inference': 50,
  'attention_memory_training': 50,
  'attention_position_wise_mlp_dim': 32,
  'attention_init_gru_gate_bias': 2.0,
  'attention_use_n_prev_actions': 0,
  'attention_use_n_prev_rewards': 0,
  'num_framestacks': 'auto',
  'dim':

In [5]:
import tensorflow
print(tensorflow.config.list_physical_devices('GPU'))
print(tensorflow.__version__)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
2.4.1


In [10]:
from ray.tune.suggest.hyperopt import HyperOptSearch

search_alg = HyperOptSearch()

stop_criteria = ray.tune.stopper.MaximumIterationStopper(100)
analysis = tune.run(
    "DQN",
    config=DEFAULT_CONFIG, 
    stop = stop_criteria, 
    num_samples = 2, 
    metric='mean_reward', 
    mode='max', 
    search_alg=search_alg, 
)




Trial name,status,loc,_fake_gpus,adam_epsilon,batch_mode,before_learn_on_batch,buffer_size,callbacks,clip_actions,clip_rewards,collect_metrics_timeout,compress_observations,create_env_on_driver,custom_eval_function,double_q,dueling,eager_tracing,env,env_config/BTC_USD,env_config/ETH_BTC,env_config/ETH_USD,evaluation_config/explore,evaluation_interval,evaluation_num_episodes,evaluation_num_workers,exploration_config/epsilon_timesteps,exploration_config/final_epsilon,exploration_config/initial_epsilon,exploration_config/type,explore,fake_sampler,final_prioritized_replay_beta,framework,gamma,grad_clip,hiddens,horizon,ignore_worker_failures,in_evaluation,input,input_evaluation,learning_starts,local_tf_session_args/inter_op_parallelism_threads,local_tf_session_args/intra_op_parallelism_threads,log_level,log_sys_usage,logger_config,lr,lr_schedule,metrics_smoothing_episodes,min_iter_time_s,model/_time_major,model/attention_dim,model/attention_head_dim,model/attention_init_gru_gate_bias,model/attention_memory_inference,model/attention_memory_training,model/attention_num_heads,model/attention_num_transformer_units,model/attention_position_wise_mlp_dim,model/attention_use_n_prev_actions,model/attention_use_n_prev_rewards,model/conv_activation,model/conv_filters,model/custom_action_dist,model/custom_model,model/custom_preprocessor,model/dim,model/fcnet_activation,model/fcnet_hiddens,model/framestack,model/free_log_std,model/grayscale,model/lstm_cell_size,model/lstm_use_prev_action,model/lstm_use_prev_action_reward,model/lstm_use_prev_reward,model/max_seq_len,model/no_final_linear,model/num_framestacks,model/post_fcnet_activation,model/post_fcnet_hiddens,model/use_attention,model/use_lstm,model/vf_share_layers,model/zero_mean,monitor,multiagent/count_steps_by,multiagent/observation_fn,multiagent/policies_to_train,multiagent/policy_mapping_fn,multiagent/replay_mode,n_step,no_done_at_end,noisy,normalize_actions,num_atoms,num_cpus_for_driver,num_cpus_per_worker,num_envs_per_worker,num_gpus,num_gpus_per_worker,num_workers,observation_filter,output,output_compress_columns,output_max_file_size,placement_strategy,postprocess_inputs,preprocessor_pref,prioritized_replay,prioritized_replay_alpha,prioritized_replay_beta,prioritized_replay_beta_annealing_timesteps,prioritized_replay_eps,record_env,remote_env_batch_wait_ms,remote_worker_envs,render_env,rollout_fragment_length,sample_async,sample_collector,seed,shuffle_buffer_size,sigma0,simple_optimizer,soft_horizon,synchronize_filters,target_network_update_freq,tf_session_args/allow_soft_placement,tf_session_args/device_count/CPU,tf_session_args/gpu_options/allow_growth,tf_session_args/inter_op_parallelism_threads,tf_session_args/intra_op_parallelism_threads,tf_session_args/log_device_placement,timesteps_per_iteration,train_batch_size,training_intensity,v_max,v_min,worker_side_prioritization
DQN_TradingEnv_e1cab056,RUNNING,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.00933e-05,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False


[2m[36m(pid=19716)[0m Instructions for updating:
[2m[36m(pid=19716)[0m non-resource variables are not supported in the long term
[2m[36m(pid=19716)[0m 2021-05-02 13:00:25,392	INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=19716)[0m 2021-05-02 13:00:25,393	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=860)[0m Instructions for updating:
[2m[36m(pid=860)[0m non-resource variables are not supported in the long term
[2m[36m(pid=25392)[0m Instructions for updating:
[2m[36m(pid=25392)[0m non-resource variables are not supported in the long term


Trial name,status,loc,_fake_gpus,adam_epsilon,batch_mode,before_learn_on_batch,buffer_size,callbacks,clip_actions,clip_rewards,collect_metrics_timeout,compress_observations,create_env_on_driver,custom_eval_function,double_q,dueling,eager_tracing,env,env_config/BTC_USD,env_config/ETH_BTC,env_config/ETH_USD,evaluation_config/explore,evaluation_interval,evaluation_num_episodes,evaluation_num_workers,exploration_config/epsilon_timesteps,exploration_config/final_epsilon,exploration_config/initial_epsilon,exploration_config/type,explore,fake_sampler,final_prioritized_replay_beta,framework,gamma,grad_clip,hiddens,horizon,ignore_worker_failures,in_evaluation,input,input_evaluation,learning_starts,local_tf_session_args/inter_op_parallelism_threads,local_tf_session_args/intra_op_parallelism_threads,log_level,log_sys_usage,logger_config,lr,lr_schedule,metrics_smoothing_episodes,min_iter_time_s,model/_time_major,model/attention_dim,model/attention_head_dim,model/attention_init_gru_gate_bias,model/attention_memory_inference,model/attention_memory_training,model/attention_num_heads,model/attention_num_transformer_units,model/attention_position_wise_mlp_dim,model/attention_use_n_prev_actions,model/attention_use_n_prev_rewards,model/conv_activation,model/conv_filters,model/custom_action_dist,model/custom_model,model/custom_preprocessor,model/dim,model/fcnet_activation,model/fcnet_hiddens,model/framestack,model/free_log_std,model/grayscale,model/lstm_cell_size,model/lstm_use_prev_action,model/lstm_use_prev_action_reward,model/lstm_use_prev_reward,model/max_seq_len,model/no_final_linear,model/num_framestacks,model/post_fcnet_activation,model/post_fcnet_hiddens,model/use_attention,model/use_lstm,model/vf_share_layers,model/zero_mean,monitor,multiagent/count_steps_by,multiagent/observation_fn,multiagent/policies_to_train,multiagent/policy_mapping_fn,multiagent/replay_mode,n_step,no_done_at_end,noisy,normalize_actions,num_atoms,num_cpus_for_driver,num_cpus_per_worker,num_envs_per_worker,num_gpus,num_gpus_per_worker,num_workers,observation_filter,output,output_compress_columns,output_max_file_size,placement_strategy,postprocess_inputs,preprocessor_pref,prioritized_replay,prioritized_replay_alpha,prioritized_replay_beta,prioritized_replay_beta_annealing_timesteps,prioritized_replay_eps,record_env,remote_env_batch_wait_ms,remote_worker_envs,render_env,rollout_fragment_length,sample_async,sample_collector,seed,shuffle_buffer_size,sigma0,simple_optimizer,soft_horizon,synchronize_filters,target_network_update_freq,tf_session_args/allow_soft_placement,tf_session_args/device_count/CPU,tf_session_args/gpu_options/allow_growth,tf_session_args/inter_op_parallelism_threads,tf_session_args/intra_op_parallelism_threads,tf_session_args/log_device_placement,timesteps_per_iteration,train_batch_size,training_intensity,v_max,v_min,worker_side_prioritization
DQN_TradingEnv_e1cab056,RUNNING,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.00933e-05,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False
DQN_TradingEnv_e1e3c84b,PENDING,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.23428e-06,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False


Trial name,status,loc,_fake_gpus,adam_epsilon,batch_mode,before_learn_on_batch,buffer_size,callbacks,clip_actions,clip_rewards,collect_metrics_timeout,compress_observations,create_env_on_driver,custom_eval_function,double_q,dueling,eager_tracing,env,env_config/BTC_USD,env_config/ETH_BTC,env_config/ETH_USD,evaluation_config/explore,evaluation_interval,evaluation_num_episodes,evaluation_num_workers,exploration_config/epsilon_timesteps,exploration_config/final_epsilon,exploration_config/initial_epsilon,exploration_config/type,explore,fake_sampler,final_prioritized_replay_beta,framework,gamma,grad_clip,hiddens,horizon,ignore_worker_failures,in_evaluation,input,input_evaluation,learning_starts,local_tf_session_args/inter_op_parallelism_threads,local_tf_session_args/intra_op_parallelism_threads,log_level,log_sys_usage,logger_config,lr,lr_schedule,metrics_smoothing_episodes,min_iter_time_s,model/_time_major,model/attention_dim,model/attention_head_dim,model/attention_init_gru_gate_bias,model/attention_memory_inference,model/attention_memory_training,model/attention_num_heads,model/attention_num_transformer_units,model/attention_position_wise_mlp_dim,model/attention_use_n_prev_actions,model/attention_use_n_prev_rewards,model/conv_activation,model/conv_filters,model/custom_action_dist,model/custom_model,model/custom_preprocessor,model/dim,model/fcnet_activation,model/fcnet_hiddens,model/framestack,model/free_log_std,model/grayscale,model/lstm_cell_size,model/lstm_use_prev_action,model/lstm_use_prev_action_reward,model/lstm_use_prev_reward,model/max_seq_len,model/no_final_linear,model/num_framestacks,model/post_fcnet_activation,model/post_fcnet_hiddens,model/use_attention,model/use_lstm,model/vf_share_layers,model/zero_mean,monitor,multiagent/count_steps_by,multiagent/observation_fn,multiagent/policies_to_train,multiagent/policy_mapping_fn,multiagent/replay_mode,n_step,no_done_at_end,noisy,normalize_actions,num_atoms,num_cpus_for_driver,num_cpus_per_worker,num_envs_per_worker,num_gpus,num_gpus_per_worker,num_workers,observation_filter,output,output_compress_columns,output_max_file_size,placement_strategy,postprocess_inputs,preprocessor_pref,prioritized_replay,prioritized_replay_alpha,prioritized_replay_beta,prioritized_replay_beta_annealing_timesteps,prioritized_replay_eps,record_env,remote_env_batch_wait_ms,remote_worker_envs,render_env,rollout_fragment_length,sample_async,sample_collector,seed,shuffle_buffer_size,sigma0,simple_optimizer,soft_horizon,synchronize_filters,target_network_update_freq,tf_session_args/allow_soft_placement,tf_session_args/device_count/CPU,tf_session_args/gpu_options/allow_growth,tf_session_args/inter_op_parallelism_threads,tf_session_args/intra_op_parallelism_threads,tf_session_args/log_device_placement,timesteps_per_iteration,train_batch_size,training_intensity,v_max,v_min,worker_side_prioritization
DQN_TradingEnv_e1cab056,RUNNING,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.00933e-05,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False
DQN_TradingEnv_e1e3c84b,PENDING,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.23428e-06,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False


[2m[36m(pid=25392)[0m sup chin
[2m[36m(pid=25392)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=25392)[0m 2.4.1
[2m[36m(pid=860)[0m sup chin
[2m[36m(pid=860)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=860)[0m 2.4.1
[2m[36m(pid=19716)[0m sup chin
[2m[36m(pid=19716)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=19716)[0m 2.4.1


[2m[36m(pid=19716)[0m 2021-05-02 13:00:47,104	INFO trainable.py:101 -- Trainable.setup took 22.301 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-05-02 13:00:55,768	ERROR trial_runner.py:732 -- Trial DQN_TradingEnv_e1cab056: Error processing event.
Traceback (most recent call last):
  File "C:\Users\Harry\anaconda3\envs\freqtrade-conda\lib\site-packages\ray\tune\trial_runner.py", line 710, in _process_trial
    decision = self._process_trial_result(trial, result)
  File "C:\Users\Harry\anaconda3\envs\freqtrade-conda\lib\site-packages\ray\tune\trial_runner.py", line 750, in _process_trial_result
    self._validate_result_metrics(flat_result)
  File "C:\Users\Harry\anaconda3\envs\freqtrade-conda\lib\site-packages\ray\tune\trial_runner.py", line 859, in _validate_result_metrics
    raise ValueError(
ValueError: Trial returned a result which did not include the specified metric(s) `mean_reward` that `tune.run

Result for DQN_TradingEnv_e1cab056:
  {}
  


Trial name,status,loc,_fake_gpus,adam_epsilon,batch_mode,before_learn_on_batch,buffer_size,callbacks,clip_actions,clip_rewards,collect_metrics_timeout,compress_observations,create_env_on_driver,custom_eval_function,double_q,dueling,eager_tracing,env,env_config/BTC_USD,env_config/ETH_BTC,env_config/ETH_USD,evaluation_config/explore,evaluation_interval,evaluation_num_episodes,evaluation_num_workers,exploration_config/epsilon_timesteps,exploration_config/final_epsilon,exploration_config/initial_epsilon,exploration_config/type,explore,fake_sampler,final_prioritized_replay_beta,framework,gamma,grad_clip,hiddens,horizon,ignore_worker_failures,in_evaluation,input,input_evaluation,learning_starts,local_tf_session_args/inter_op_parallelism_threads,local_tf_session_args/intra_op_parallelism_threads,log_level,log_sys_usage,logger_config,lr,lr_schedule,metrics_smoothing_episodes,min_iter_time_s,model/_time_major,model/attention_dim,model/attention_head_dim,model/attention_init_gru_gate_bias,model/attention_memory_inference,model/attention_memory_training,model/attention_num_heads,model/attention_num_transformer_units,model/attention_position_wise_mlp_dim,model/attention_use_n_prev_actions,model/attention_use_n_prev_rewards,model/conv_activation,model/conv_filters,model/custom_action_dist,model/custom_model,model/custom_preprocessor,model/dim,model/fcnet_activation,model/fcnet_hiddens,model/framestack,model/free_log_std,model/grayscale,model/lstm_cell_size,model/lstm_use_prev_action,model/lstm_use_prev_action_reward,model/lstm_use_prev_reward,model/max_seq_len,model/no_final_linear,model/num_framestacks,model/post_fcnet_activation,model/post_fcnet_hiddens,model/use_attention,model/use_lstm,model/vf_share_layers,model/zero_mean,monitor,multiagent/count_steps_by,multiagent/observation_fn,multiagent/policies_to_train,multiagent/policy_mapping_fn,multiagent/replay_mode,n_step,no_done_at_end,noisy,normalize_actions,num_atoms,num_cpus_for_driver,num_cpus_per_worker,num_envs_per_worker,num_gpus,num_gpus_per_worker,num_workers,observation_filter,output,output_compress_columns,output_max_file_size,placement_strategy,postprocess_inputs,preprocessor_pref,prioritized_replay,prioritized_replay_alpha,prioritized_replay_beta,prioritized_replay_beta_annealing_timesteps,prioritized_replay_eps,record_env,remote_env_batch_wait_ms,remote_worker_envs,render_env,rollout_fragment_length,sample_async,sample_collector,seed,shuffle_buffer_size,sigma0,simple_optimizer,soft_horizon,synchronize_filters,target_network_update_freq,tf_session_args/allow_soft_placement,tf_session_args/device_count/CPU,tf_session_args/gpu_options/allow_growth,tf_session_args/inter_op_parallelism_threads,tf_session_args/intra_op_parallelism_threads,tf_session_args/log_device_placement,timesteps_per_iteration,train_batch_size,training_intensity,v_max,v_min,worker_side_prioritization
DQN_TradingEnv_e1e3c84b,PENDING,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.23428e-06,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False
DQN_TradingEnv_e1cab056,ERROR,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.00933e-05,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False

Trial name,# failures,error file
DQN_TradingEnv_e1cab056,1,"C:\Users\Harry\ray_results\DQN\DQN_TradingEnv_e1cab056_1__fake_gpus=False,adam_epsilon=1e-08,batch_mode=truncate_episodes,before_learn_on_batch=None,buffer_size=_2021-05-02_13-00-22\error.txt"


[2m[36m(pid=21312)[0m Instructions for updating:
[2m[36m(pid=21312)[0m non-resource variables are not supported in the long term
[2m[36m(pid=21312)[0m 2021-05-02 13:00:59,808	INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=21312)[0m 2021-05-02 13:00:59,808	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=24668)[0m Instructions for updating:
[2m[36m(pid=24668)[0m non-resource variables are not supported in the long term
[2m[36m(pid=14632)[0m Instructions for updating:
[2m[36m(pid=14632)[0m non-resource variables are not supported in the long term


[2m[36m(pid=24668)[0m sup chin
[2m[36m(pid=24668)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=24668)[0m 2.4.1
[2m[36m(pid=14632)[0m sup chin
[2m[36m(pid=14632)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=14632)[0m 2.4.1
[2m[36m(pid=21312)[0m sup chin
[2m[36m(pid=21312)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=21312)[0m 2.4.1


[2m[36m(pid=21312)[0m 2021-05-02 13:01:22,249	INFO trainable.py:101 -- Trainable.setup took 23.076 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.
2021-05-02 13:01:31,542	ERROR trial_runner.py:732 -- Trial DQN_TradingEnv_e1e3c84b: Error processing event.
Traceback (most recent call last):
  File "C:\Users\Harry\anaconda3\envs\freqtrade-conda\lib\site-packages\ray\tune\trial_runner.py", line 710, in _process_trial
    decision = self._process_trial_result(trial, result)
  File "C:\Users\Harry\anaconda3\envs\freqtrade-conda\lib\site-packages\ray\tune\trial_runner.py", line 750, in _process_trial_result
    self._validate_result_metrics(flat_result)
  File "C:\Users\Harry\anaconda3\envs\freqtrade-conda\lib\site-packages\ray\tune\trial_runner.py", line 859, in _validate_result_metrics
    raise ValueError(
ValueError: Trial returned a result which did not include the specified metric(s) `mean_reward` that `tune.run

Result for DQN_TradingEnv_e1e3c84b:
  {}
  


Trial name,status,loc,_fake_gpus,adam_epsilon,batch_mode,before_learn_on_batch,buffer_size,callbacks,clip_actions,clip_rewards,collect_metrics_timeout,compress_observations,create_env_on_driver,custom_eval_function,double_q,dueling,eager_tracing,env,env_config/BTC_USD,env_config/ETH_BTC,env_config/ETH_USD,evaluation_config/explore,evaluation_interval,evaluation_num_episodes,evaluation_num_workers,exploration_config/epsilon_timesteps,exploration_config/final_epsilon,exploration_config/initial_epsilon,exploration_config/type,explore,fake_sampler,final_prioritized_replay_beta,framework,gamma,grad_clip,hiddens,horizon,ignore_worker_failures,in_evaluation,input,input_evaluation,learning_starts,local_tf_session_args/inter_op_parallelism_threads,local_tf_session_args/intra_op_parallelism_threads,log_level,log_sys_usage,logger_config,lr,lr_schedule,metrics_smoothing_episodes,min_iter_time_s,model/_time_major,model/attention_dim,model/attention_head_dim,model/attention_init_gru_gate_bias,model/attention_memory_inference,model/attention_memory_training,model/attention_num_heads,model/attention_num_transformer_units,model/attention_position_wise_mlp_dim,model/attention_use_n_prev_actions,model/attention_use_n_prev_rewards,model/conv_activation,model/conv_filters,model/custom_action_dist,model/custom_model,model/custom_preprocessor,model/dim,model/fcnet_activation,model/fcnet_hiddens,model/framestack,model/free_log_std,model/grayscale,model/lstm_cell_size,model/lstm_use_prev_action,model/lstm_use_prev_action_reward,model/lstm_use_prev_reward,model/max_seq_len,model/no_final_linear,model/num_framestacks,model/post_fcnet_activation,model/post_fcnet_hiddens,model/use_attention,model/use_lstm,model/vf_share_layers,model/zero_mean,monitor,multiagent/count_steps_by,multiagent/observation_fn,multiagent/policies_to_train,multiagent/policy_mapping_fn,multiagent/replay_mode,n_step,no_done_at_end,noisy,normalize_actions,num_atoms,num_cpus_for_driver,num_cpus_per_worker,num_envs_per_worker,num_gpus,num_gpus_per_worker,num_workers,observation_filter,output,output_compress_columns,output_max_file_size,placement_strategy,postprocess_inputs,preprocessor_pref,prioritized_replay,prioritized_replay_alpha,prioritized_replay_beta,prioritized_replay_beta_annealing_timesteps,prioritized_replay_eps,record_env,remote_env_batch_wait_ms,remote_worker_envs,render_env,rollout_fragment_length,sample_async,sample_collector,seed,shuffle_buffer_size,sigma0,simple_optimizer,soft_horizon,synchronize_filters,target_network_update_freq,tf_session_args/allow_soft_placement,tf_session_args/device_count/CPU,tf_session_args/gpu_options/allow_growth,tf_session_args/inter_op_parallelism_threads,tf_session_args/intra_op_parallelism_threads,tf_session_args/log_device_placement,timesteps_per_iteration,train_batch_size,training_intensity,v_max,v_min,worker_side_prioritization
DQN_TradingEnv_e1cab056,ERROR,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.00933e-05,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False
DQN_TradingEnv_e1e3c84b,ERROR,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.23428e-06,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False

Trial name,# failures,error file
DQN_TradingEnv_e1cab056,1,"C:\Users\Harry\ray_results\DQN\DQN_TradingEnv_e1cab056_1__fake_gpus=False,adam_epsilon=1e-08,batch_mode=truncate_episodes,before_learn_on_batch=None,buffer_size=_2021-05-02_13-00-22\error.txt"
DQN_TradingEnv_e1e3c84b,1,"C:\Users\Harry\ray_results\DQN\DQN_TradingEnv_e1e3c84b_2__fake_gpus=False,adam_epsilon=1e-08,batch_mode=truncate_episodes,before_learn_on_batch=None,buffer_size=_2021-05-02_13-00-22\error.txt"


Trial name,status,loc,_fake_gpus,adam_epsilon,batch_mode,before_learn_on_batch,buffer_size,callbacks,clip_actions,clip_rewards,collect_metrics_timeout,compress_observations,create_env_on_driver,custom_eval_function,double_q,dueling,eager_tracing,env,env_config/BTC_USD,env_config/ETH_BTC,env_config/ETH_USD,evaluation_config/explore,evaluation_interval,evaluation_num_episodes,evaluation_num_workers,exploration_config/epsilon_timesteps,exploration_config/final_epsilon,exploration_config/initial_epsilon,exploration_config/type,explore,fake_sampler,final_prioritized_replay_beta,framework,gamma,grad_clip,hiddens,horizon,ignore_worker_failures,in_evaluation,input,input_evaluation,learning_starts,local_tf_session_args/inter_op_parallelism_threads,local_tf_session_args/intra_op_parallelism_threads,log_level,log_sys_usage,logger_config,lr,lr_schedule,metrics_smoothing_episodes,min_iter_time_s,model/_time_major,model/attention_dim,model/attention_head_dim,model/attention_init_gru_gate_bias,model/attention_memory_inference,model/attention_memory_training,model/attention_num_heads,model/attention_num_transformer_units,model/attention_position_wise_mlp_dim,model/attention_use_n_prev_actions,model/attention_use_n_prev_rewards,model/conv_activation,model/conv_filters,model/custom_action_dist,model/custom_model,model/custom_preprocessor,model/dim,model/fcnet_activation,model/fcnet_hiddens,model/framestack,model/free_log_std,model/grayscale,model/lstm_cell_size,model/lstm_use_prev_action,model/lstm_use_prev_action_reward,model/lstm_use_prev_reward,model/max_seq_len,model/no_final_linear,model/num_framestacks,model/post_fcnet_activation,model/post_fcnet_hiddens,model/use_attention,model/use_lstm,model/vf_share_layers,model/zero_mean,monitor,multiagent/count_steps_by,multiagent/observation_fn,multiagent/policies_to_train,multiagent/policy_mapping_fn,multiagent/replay_mode,n_step,no_done_at_end,noisy,normalize_actions,num_atoms,num_cpus_for_driver,num_cpus_per_worker,num_envs_per_worker,num_gpus,num_gpus_per_worker,num_workers,observation_filter,output,output_compress_columns,output_max_file_size,placement_strategy,postprocess_inputs,preprocessor_pref,prioritized_replay,prioritized_replay_alpha,prioritized_replay_beta,prioritized_replay_beta_annealing_timesteps,prioritized_replay_eps,record_env,remote_env_batch_wait_ms,remote_worker_envs,render_env,rollout_fragment_length,sample_async,sample_collector,seed,shuffle_buffer_size,sigma0,simple_optimizer,soft_horizon,synchronize_filters,target_network_update_freq,tf_session_args/allow_soft_placement,tf_session_args/device_count/CPU,tf_session_args/gpu_options/allow_growth,tf_session_args/inter_op_parallelism_threads,tf_session_args/intra_op_parallelism_threads,tf_session_args/log_device_placement,timesteps_per_iteration,train_batch_size,training_intensity,v_max,v_min,worker_side_prioritization
DQN_TradingEnv_e1cab056,ERROR,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.00933e-05,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False
DQN_TradingEnv_e1e3c84b,ERROR,,False,1e-08,truncate_episodes,,20000,<class 'ray.rllib.agents.callbacks.DefaultCallbacks'>,True,,180,False,False,,True,True,False,TradingEnv,[[1.61477640e+09 5.12341600e+04 5.26660000e+04 5.25938400e+04  5.15608400e+04 1.35538176e+03]  [1.61478000e+09 5.05997100e+04 5.15615500e+04 5.15615400e+04  5.07413900e+04 2.35682039e+03]  [1.61478360e+09 5.05100300e+04 5.11425800e+04 5.07413800e+04  5.08080200e+04 8.99710004e+02]  ...  [1.61996400e+09 5.67000100e+04 5.70473100e+04 5.67261800e+04  5.68532400e+04 2.18527402e+02]  [1.61996760e+09 5.64986300e+04 5.68896000e+04 5.68606500e+04  5.66392700e+04 3.33395576e+02]  [1.61997120e+09 5.63715300e+04 5.68367800e+04 5.66392700e+04  5.67691100e+04 1.80423061e+02]],[[1.61477640e+09 3.11800000e-02 3.14400000e-02 3.14000000e-02  3.13000000e-02 1.06470343e+03]  [1.61478000e+09 3.11400000e-02 3.13800000e-02 3.12800000e-02  3.13100000e-02 1.14537850e+03]  [1.61478360e+09 3.12900000e-02 3.15700000e-02 3.13200000e-02  3.15200000e-02 6.47944067e+02]  ...  [1.61996400e+09 5.12800000e-02 5.14600000e-02 5.14400000e-02  5.13700000e-02 7.60391311e+02]  [1.61996760e+09 5.13300000e-02 5.18500000e-02 5.13800000e-02  5.17700000e-02 1.22748116e+03]  [1.61997120e+09 5.17000000e-02 5.18900000e-02 5.17700000e-02  5.17300000e-02 9.79926283e+02]],[[1.61477640e+09 1.60432000e+03 1.65393000e+03 1.65104000e+03  1.61344000e+03 1.17337897e+04]  [1.61478000e+09 1.58206000e+03 1.61380000e+03 1.61344000e+03  1.58954000e+03 1.36722709e+04]  [1.61478360e+09 1.58741000e+03 1.60421000e+03 1.58962000e+03  1.60194000e+03 8.14943651e+03]  ...  [1.61996400e+09 2.91394000e+03 2.92850000e+03 2.91742000e+03  2.92083000e+03 3.61461916e+03]  [1.61996760e+09 2.91280000e+03 2.93987000e+03 2.92071000e+03  2.93200000e+03 4.85481951e+03]  [1.61997120e+09 2.92253000e+03 2.94400000e+03 2.93199000e+03  2.93649000e+03 4.59094654e+03]],False,,10,0,10000,0.02,1,EpsilonGreedy,True,False,0.4,tf,0.99,40,"(512, 256, 128)",,False,False,sampler,"('is', 'wis')",1000,8,8,WARN,True,,1.23428e-06,,100,1,False,64,32,2,50,50,1,1,32,0,0,relu,,,,,84,tanh,"(256, 256)",True,False,False,256,False,-1,False,20,False,auto,relu,(),False,False,True,True,-1,env_steps,,,,independent,5,False,True,False,1,1,1,1,2,0,2,NoFilter,,"('obs', 'new_obs')",67108864,PACK,False,deepmind,True,0.6,0.4,20000,1e-06,False,0,False,False,4,False,<class 'ray.rllib.evaluation.collectors.simple_list_collector.SimpleListCollector'>,,0,0.5,-1,False,True,25,True,1,True,2,2,False,15,32,,10,-10,False

Trial name,# failures,error file
DQN_TradingEnv_e1cab056,1,"C:\Users\Harry\ray_results\DQN\DQN_TradingEnv_e1cab056_1__fake_gpus=False,adam_epsilon=1e-08,batch_mode=truncate_episodes,before_learn_on_batch=None,buffer_size=_2021-05-02_13-00-22\error.txt"
DQN_TradingEnv_e1e3c84b,1,"C:\Users\Harry\ray_results\DQN\DQN_TradingEnv_e1e3c84b_2__fake_gpus=False,adam_epsilon=1e-08,batch_mode=truncate_episodes,before_learn_on_batch=None,buffer_size=_2021-05-02_13-00-22\error.txt"


TuneError: ('Trials did not complete', [DQN_TradingEnv_e1cab056, DQN_TradingEnv_e1e3c84b])





In [6]:
checkpoints = analysis.get_trial_checkpoints_paths(
    trial=analysis.get_best_trial("episode_reward_mean"),
    metric="episode_reward_mean"
)
checkpoint_path = checkpoints[0][0]
"""
# Restore agent
agent = ppo.PPOTrainer(
    env="TradingEnv",
    config={
        "env_config": {
            "window_size": 25
        },
        "framework": "torch",
        "log_level": "DEBUG",
        "ignore_worker_failures": True,
        "num_workers": 1,
        "num_gpus": 0,
        "clip_rewards": True,
        "lr": 8e-6,
        "lr_schedule": [
            [0, 1e-1],
            [int(1e2), 1e-2],
            [int(1e3), 1e-3],
            [int(1e4), 1e-4],
            [int(1e5), 1e-5],
            [int(1e6), 1e-6],
            [int(1e7), 1e-7]
        ],
        "gamma": 0,
        "observation_filter": "MeanStdFilter",
        "lambda": 0.72,
        "vf_loss_coeff": 0.5,
        "entropy_coeff": 0.01
    }
)
agent.restore(checkpoint_path)



# Instantiate the environment
env = create_env({
    "window_size": 25
})

# Run until episode ends
episode_reward = 0
done = False
obs = env.reset()

while not done:
    action = agent.compute_action(obs)
    obs, reward, done, info = env.step(action)
    episode_reward += reward

env.render()
"""

NameError: name 'analysis' is not defined

In [11]:
dir(analysis)

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_checkpoints',
 '_configs',
 '_experiment_dir',
 '_experiment_state',
 '_get_trial_paths',
 '_retrieve_rows',
 '_trial_dataframes',
 '_validate_metric',
 '_validate_mode',
 'best_checkpoint',
 'best_config',
 'best_dataframe',
 'best_logdir',
 'best_result',
 'best_result_df',
 'best_trial',
 'dataframe',
 'default_metric',
 'default_mode',
 'fetch_trial_dataframes',
 'get_all_configs',
 'get_best_checkpoint',
 'get_best_config',
 'get_best_logdir',
 'get_best_trial',
 'get_trial_checkpoints_paths',
 'results',
 'results_df',
 'runner_data',
 'stats',
 'trial_dataframes',
 'trials']

































































In [28]:
dir(analysis.trials[0])
#analysis.trials[0].config['model']





Checkpoint(persistent, None)



# PPO

In [30]:
DEFAULT_CONFIG = with_common_config({
    
    "env": "TradingEnv",
    "env_config": env_cfg,

    # Should use a critic as a baseline (otherwise don't use value baseline;
    # required for using GAE).
    "use_critic": True,
    # If true, use the Generalized Advantage Estimator (GAE)
    # with a value function, see https://arxiv.org/pdf/1506.02438.pdf.
    "use_gae": True,
    # The GAE (lambda) parameter.
    "lambda": 1.0,
    # Initial coefficient for KL divergence.
    "kl_coeff": 0.2,
    # Size of batches collected from each worker.
    "rollout_fragment_length": 200,
    # Number of timesteps collected for each SGD round. This defines the size
    # of each SGD epoch.
    "train_batch_size": 4000,
    # Total SGD batch size across all devices for SGD. This defines the
    # minibatch size within each epoch.
    "sgd_minibatch_size": 128,
    # Whether to shuffle sequences in the batch when training (recommended).
    "shuffle_sequences": True,
    # Number of SGD iterations in each outer loop (i.e., number of epochs to
    # execute per train batch).
    "num_sgd_iter": 30,
    # Stepsize of SGD.
    "lr": 5e-5,
    # Learning rate schedule.
    "lr_schedule": None,
    # Coefficient of the value function loss. IMPORTANT: you must tune this if
    # you set vf_share_layers=True inside your model's config.
    "vf_loss_coeff": 1.0,
    "model": {
        # Share layers for value function. If you set this to True, it's
        # important to tune vf_loss_coeff.
        "vf_share_layers": False,
    },
    # Coefficient of the entropy regularizer.
    "entropy_coeff": 0.0,
    # Decay schedule for the entropy regularizer.
    "entropy_coeff_schedule": None,
    # PPO clip parameter.
    "clip_param": 0.3,
    # Clip param for the value function. Note that this is sensitive to the
    # scale of the rewards. If your expected V is large, increase this.
    "vf_clip_param": 100.0,
    # If specified, clip the global norm of gradients by this amount.
    "grad_clip": None,
    # Target value for KL divergence.
    "kl_target": 0.01,
    # Whether to rollout "complete_episodes" or "truncate_episodes".
    "batch_mode": "truncate_episodes",
    # Which observation filter to apply to the observation.
    "observation_filter": "NoFilter",

    # Deprecated keys:
    # Share layers for value function. If you set this to True, it's important
    # to tune vf_loss_coeff.
    # Use config.model.vf_share_layers instead.
    #"vf_share_layers": DEPRECATED_VALUE,
})


In [31]:
stop_criteria = ray.tune.stopper.MaximumIterationStopper(100)
analysis = tune.run(
    "PPO",
    config=DEFAULT_CONFIG, 
    stop = stop_criteria, 
    num_samples = 2, 
)

Trial name,status,loc
PPO_TradingEnv_906df_00000,RUNNING,
PPO_TradingEnv_906df_00001,PENDING,


[2m[36m(pid=22452)[0m Instructions for updating:
[2m[36m(pid=22452)[0m non-resource variables are not supported in the long term
[2m[36m(pid=20380)[0m Instructions for updating:
[2m[36m(pid=20380)[0m non-resource variables are not supported in the long term
[2m[36m(pid=22452)[0m 2021-05-02 12:15:11,953	INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=22452)[0m 2021-05-02 12:15:11,960	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=20380)[0m 2021-05-02 12:15:12,001	INFO trainer.py:669 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=20380)[0m 2021-05-02 12:15:12,001	INFO trainer.py:694 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=25064)[0m Instructions for updating:
[2m[

[2m[36m(pid=23792)[0m sup chin
[2m[36m(pid=23792)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=23792)[0m 2.4.1
[2m[36m(pid=25064)[0m sup chin
[2m[36m(pid=25064)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=25064)[0m 2.4.1
[2m[36m(pid=23396)[0m sup chin
[2m[36m(pid=23396)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=23396)[0m 2.4.1
[2m[36m(pid=24128)[0m sup chin
[2m[36m(pid=24128)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=24128)[0m 2.4.1




[2m[36m(pid=22452)[0m sup chin
[2m[36m(pid=22452)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=22452)[0m 2.4.1
[2m[36m(pid=20380)[0m sup chin
[2m[36m(pid=20380)[0m [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
[2m[36m(pid=20380)[0m 2.4.1




Result for PPO_TradingEnv_906df_00000:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-05-02_12-15-46
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 502214496.71754247
  episode_reward_mean: 437841079.9355953
  episode_reward_min: 373467663.153648
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: d6005b9d3c5b4991815f5966a199d98f
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 3.684814929962158
          entropy_coeff: 0.0
          kl: 0.029549749568104744
          model: {}
          policy_loss: -0.02817421406507492
          total_loss: 2532700606955520.0
          vf_explained_var: 4.0978193283081055e-08
          vf_loss: 2532700606955520.0
    num_agent_steps_sampled: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 1
  node_ip: 10



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,1.0,25.7095,4000.0,437841000.0,502214000.0,373468000.0,1443.0
PPO_TradingEnv_906df_00001,RUNNING,,,,,,,,




Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 4000
  custom_metrics: {}
  date: 2021-05-02_12-15-46
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 412891072.0799493
  episode_reward_mean: 334801601.03713375
  episode_reward_min: 256712129.99431816
  episodes_this_iter: 2
  episodes_total: 2
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.20000000298023224
          cur_lr: 4.999999873689376e-05
          entropy: 3.684633493423462
          entropy_coeff: 0.0
          kl: 0.02918299287557602
          model: {}
          policy_loss: -0.019425131380558014
          total_loss: 2359238957465600.0
          vf_explained_var: 1.6763806343078613e-08
          vf_loss: 2359238957465600.0
    num_agent_steps_sampled: 4000
    num_steps_sampled: 4000
    num_steps_trained: 4000
  iterations_since_restore: 1
  node_ip: 



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,2,52.165,8000,357020000.0,502214000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,1,25.6065,4000,334802000.0,412891000.0,256712000.0,1443


Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 8000
  custom_metrics: {}
  date: 2021-05-02_12-16-13
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 385036238.30124897
  episode_reward_min: 256712129.99431816
  episodes_this_iter: 2
  episodes_total: 4
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.30000001192092896
          cur_lr: 4.999999873689376e-05
          entropy: 3.6634135246276855
          entropy_coeff: 0.0
          kl: 0.02170611172914505
          model: {}
          policy_loss: -0.014797271229326725
          total_loss: 2316031049596928.0
          vf_explained_var: -3.725290298461914e-09
          vf_loss: 2316031049596928.0
    num_agent_steps_sampled: 8000
    num_steps_sampled: 8000
    num_steps_trained: 8000
  iterations_since_restore: 2
  node_ip: 



Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-05-02_12-16-39
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 403985536.17394304
  episode_reward_min: 231369899.57460704
  episodes_this_iter: 4
  episodes_total: 8
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.654881715774536
          entropy_coeff: 0.0
          kl: 0.010116513818502426
          model: {}
          policy_loss: -0.014526057988405228
          total_loss: 2283011877896192.0
          vf_explained_var: -3.91155481338501e-08
          vf_loss: 2283011877896192.0
    num_agent_steps_sampled: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 3
  node_i

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,2,52.165,8000,357020000.0,502214000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,3,77.947,12000,403986000.0,609157000.0,231370000.0,1443




Result for PPO_TradingEnv_906df_00000:
  agent_timesteps_total: 12000
  custom_metrics: {}
  date: 2021-05-02_12-16-39
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 559245548.3228619
  episode_reward_mean: 393071917.1430422
  episode_reward_min: 194898485.92301768
  episodes_this_iter: 4
  episodes_total: 8
  experiment_id: d6005b9d3c5b4991815f5966a199d98f
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.6284542083740234
          entropy_coeff: 0.0
          kl: 0.017089808359742165
          model: {}
          policy_loss: -0.024836424738168716
          total_loss: 2324262354419712.0
          vf_explained_var: 0.0
          vf_loss: 2324262354419712.0
    num_agent_steps_sampled: 12000
    num_steps_sampled: 12000
    num_steps_trained: 12000
  iterations_since_restore: 3
  node_ip: 10.0.0.163
  n



Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-05-02_12-17-05
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 421830656.72802603
  episode_reward_min: 231369899.57460704
  episodes_this_iter: 2
  episodes_total: 10
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.6478357315063477
          entropy_coeff: 0.0
          kl: 0.01597202755510807
          model: {}
          policy_loss: -0.01879916712641716
          total_loss: 2366078961319936.0
          vf_explained_var: -1.862645149230957e-09
          vf_loss: 2366078961319936.0
    num_agent_steps_sampled: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 4
  node_

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,3,78.434,12000,393072000.0,559246000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,4,104.124,16000,421831000.0,609157000.0,231370000.0,1443




Result for PPO_TradingEnv_906df_00000:
  agent_timesteps_total: 16000
  custom_metrics: {}
  date: 2021-05-02_12-17-06
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 600251078.4893117
  episode_reward_mean: 393972490.1556667
  episode_reward_min: 194898485.92301768
  episodes_this_iter: 2
  episodes_total: 10
  experiment_id: d6005b9d3c5b4991815f5966a199d98f
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.598478317260742
          entropy_coeff: 0.0
          kl: 0.017061933875083923
          model: {}
          policy_loss: -0.01707525923848152
          total_loss: 2478330079084544.0
          vf_explained_var: -2.421438694000244e-08
          vf_loss: 2478330079084544.0
    num_agent_steps_sampled: 16000
    num_steps_sampled: 16000
    num_steps_trained: 16000
  iterations_since_restore: 4
  node_



Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-05-02_12-17-31
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 396789537.1144655
  episode_reward_min: 231369899.57460704
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.6427111625671387
          entropy_coeff: 0.0
          kl: 0.012298857793211937
          model: {}
          policy_loss: -0.015781324356794357
          total_loss: 2176560442376192.0
          vf_explained_var: -1.862645149230957e-09
          vf_loss: 2176560442376192.0
    num_agent_steps_sampled: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 5
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,4,104.887,16000,393972000.0,600251000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,5,130.278,20000,396790000.0,609157000.0,231370000.0,1443




Result for PPO_TradingEnv_906df_00000:
  agent_timesteps_total: 20000
  custom_metrics: {}
  date: 2021-05-02_12-17-32
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 600251078.4893117
  episode_reward_mean: 402149532.76221675
  episode_reward_min: 194898485.92301768
  episodes_this_iter: 2
  episodes_total: 12
  experiment_id: d6005b9d3c5b4991815f5966a199d98f
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.563166618347168
          entropy_coeff: 0.0
          kl: 0.012666499242186546
          model: {}
          policy_loss: -0.00973757728934288
          total_loss: 2989363273138176.0
          vf_explained_var: -1.30385160446167e-08
          vf_loss: 2989363273138176.0
    num_agent_steps_sampled: 20000
    num_steps_sampled: 20000
    num_steps_trained: 20000
  iterations_since_restore: 5
  node_



Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-05-02_12-17-59
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 387719213.93731457
  episode_reward_min: 194672376.14477342
  episodes_this_iter: 4
  episodes_total: 16
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.601344585418701
          entropy_coeff: 0.0
          kl: 0.016396688297390938
          model: {}
          policy_loss: -0.020912040024995804
          total_loss: 2030979170435072.0
          vf_explained_var: -1.4901161193847656e-08
          vf_loss: 2030979170435072.0
    num_agent_steps_sampled: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 6
  nod



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,5,131.222,20000,402150000.0,600251000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,6,157.734,24000,387719000.0,609157000.0,194672000.0,1443


Result for PPO_TradingEnv_906df_00000:
  agent_timesteps_total: 24000
  custom_metrics: {}
  date: 2021-05-02_12-18-00
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 600251078.4893117
  episode_reward_mean: 399596852.6591784
  episode_reward_min: 194898485.92301768
  episodes_this_iter: 4
  episodes_total: 16
  experiment_id: d6005b9d3c5b4991815f5966a199d98f
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.551697254180908
          entropy_coeff: 0.0
          kl: 0.014328718185424805
          model: {}
          policy_loss: -0.0163491852581501
          total_loss: 1776786631294976.0
          vf_explained_var: 0.0
          vf_loss: 1776786631294976.0
    num_agent_steps_sampled: 24000
    num_steps_sampled: 24000
    num_steps_trained: 24000
  iterations_since_restore: 6
  node_ip: 10.0.0.163
  num



Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-05-02_12-18-25
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 378781762.659733
  episode_reward_min: 194672376.14477342
  episodes_this_iter: 2
  episodes_total: 18
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.603041648864746
          entropy_coeff: 0.0
          kl: 0.012712201103568077
          model: {}
          policy_loss: -0.009032242000102997
          total_loss: 1931663151988736.0
          vf_explained_var: 1.862645149230957e-08
          vf_loss: 1931663151988736.0
    num_agent_steps_sampled: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 7
  node_ip



Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,6,158.805,24000,399597000.0,600251000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,7,184.051,28000,378782000.0,609157000.0,194672000.0,1443


Result for PPO_TradingEnv_906df_00000:
  agent_timesteps_total: 28000
  custom_metrics: {}
  date: 2021-05-02_12-18-26
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 600251078.4893117
  episode_reward_mean: 396940719.9564163
  episode_reward_min: 194898485.92301768
  episodes_this_iter: 2
  episodes_total: 18
  experiment_id: d6005b9d3c5b4991815f5966a199d98f
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.5235328674316406
          entropy_coeff: 0.0
          kl: 0.015590965747833252
          model: {}
          policy_loss: -0.018306730315089226
          total_loss: 2106611933904896.0
          vf_explained_var: 1.6763806343078613e-08
          vf_loss: 2106611933904896.0
    num_agent_steps_sampled: 28000
    num_steps_sampled: 28000
    num_steps_trained: 28000
  iterations_since_restore: 7
  nod



Result for PPO_TradingEnv_906df_00001:
  agent_timesteps_total: 32000
  custom_metrics: {}
  date: 2021-05-02_12-18-51
  done: false
  episode_len_mean: 1443.0
  episode_media: {}
  episode_reward_max: 609156966.664645
  episode_reward_mean: 384271786.5032428
  episode_reward_min: 194672376.14477342
  episodes_this_iter: 4
  episodes_total: 22
  experiment_id: ea22ce614f804a7ca53ef7eeb9224c05
  hostname: DESKTOP-3BVVEU8
  info:
    learner:
      default_policy:
        learner_stats:
          cur_kl_coeff: 0.44999998807907104
          cur_lr: 4.999999873689376e-05
          entropy: 3.5788302421569824
          entropy_coeff: 0.0
          kl: 0.013768007978796959
          model: {}
          policy_loss: -0.009039110504090786
          total_loss: 2789225581772800.0
          vf_explained_var: -5.587935447692871e-09
          vf_loss: 2789225581772800.0
    num_agent_steps_sampled: 32000
    num_steps_sampled: 32000
    num_steps_trained: 32000
  iterations_since_restore: 8
  node

Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,7,184.922,28000,396941000.0,600251000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,8,210.355,32000,384272000.0,609157000.0,194672000.0,1443




Trial name,status,loc,iter,total time (s),ts,reward,episode_reward_max,episode_reward_min,episode_len_mean
PPO_TradingEnv_906df_00000,RUNNING,10.0.0.163:22452,7,184.922,28000,396941000.0,600251000.0,194898000.0,1443
PPO_TradingEnv_906df_00001,RUNNING,10.0.0.163:20380,8,210.355,32000,384272000.0,609157000.0,194672000.0,1443


2021-05-02 12:18:52,144	ERROR tune.py:545 -- Trials did not complete: [PPO_TradingEnv_906df_00000, PPO_TradingEnv_906df_00001]
2021-05-02 12:18:52,144	INFO tune.py:549 -- Total run time: 223.32 seconds (223.06 seconds for the tuning loop).
