**This notebook will try to implement the tensboard plots, along with the hyperparameter optimisation**

In [1]:
import trading_gym
from trading_gym.registry.gaia.v9.env import GAIAPredictorsContinuousV9
from trading_gym.registry.gaia.v8.env import GAIAPredictorsContinuousV8
from datetime import datetime
from trading_gym.contracts import Cash
from collections import namedtuple
import json
import tensorflow as tf
import os
import pandas as pd
import ray
print(datetime.now())
print(trading_gym.__name__, trading_gym.__version__)
print(ray.__name__, ray.__version__)

2019-08-13 08:43:25.008890
trading_gym 0.8.1
ray 0.7.2


In [2]:
import gym
import numpy as np
import stable_baselines
from stable_baselines.ddpg.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines.ddpg.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise, AdaptiveParamNoiseSpec
from stable_baselines import DDPG

In [4]:
import os
os.getcwd()

'/home/Nicholas/trading-gym/notebooks/trading-gym/walk-forward'

In [5]:
import sys 
sys.path.insert(1,'/home/Nicholas/trading-gym/rl-baselines-zoo/utils')

In [6]:
import utils
# import 

In [7]:
from utils import make_env, ALGOS, linear_schedule, get_latest_run_id, get_wrapper_class
from hyperparams_opt import hyperparam_optimization

In [8]:
import numpy as np
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines import PPO2
from stable_baselines.common.policies import MlpLnLstmPolicy
import optuna

In [11]:
n_cpu = 4


def optimize_PPO(trial):
    """ Learning hyperparamters we want to optimise"""
    return {
        'n_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
        'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
        'ent_coef': trial.suggest_loguniform('ent_coef', 1e-8, 1e-1),
        'cliprange': trial.suggest_uniform('cliprange', 0.1, 0.4),
        'noptepochs': int(trial.suggest_loguniform('noptepochs', 1, 48)),
        'lam': trial.suggest_uniform('lam', 0.8, 1.)
    }

def optimize_DDPG(trial):
    """ Learning hyperparamters we want to optimise"""
    n_actions = 2
    return {
        # the ones we won't optimise 
        
        'action_noise': OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),\
                                                     sigma=float(0.5) * np.ones(n_actions)),
        'param_noise': None, # AdaptiveParamNoiseSpec(initial_stddev=0.1, desired_action_stddev=0.1)

        # ...and the ones we will
        'nb_train_steps': int(trial.suggest_loguniform('n_steps', 16, 2048)),
        'gamma': trial.suggest_loguniform('gamma', 0.8, 0.9999),
        'actor_lr': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
        'critic_lr': trial.suggest_loguniform('learning_rate', 1e-5, 1.)
    }



def optimize_TD3(trial):
    """ Learning hyperparamters we want to optimise"""
    return {
        # the ones we won't optimise 
        
        
        # ...and the ones wee will
        'gamma': trial.suggest_loguniform('gamma', 0.9, 0.9999),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1.),
        
    }
    

In [12]:
# three different specific hyperparams for the agents that we won't change or try to optimize 
td3_hyparam = dict()
td3_hyparam['policy'] = 'MlpPolicy'
td3_hyparam['tensorboard_log'] = "optuna_tensorboard/optuna_td3v9"

sac_hyparam = dict()
sac_hyparam['policy'] = 'MlpPolicy'
sac_hyparam['tensorboard_log'] = "optuna_tensorboard/optuna_sacv9"


ddpg_hyparam = dict()
ddpg_hyparam['policy'] = 'MlpPolicy'
ddpg_hyparam['tensorboard_log'] = "optuna_tensorboard/optuna_DDPGv9"

**IMPORTANT:
One needs to choose the agent to optimize**


In [13]:
# agent = 'sac'
agent = 'ddpg'
# agent = 'td3'

In [16]:
def optimize_agent(trial):
    
    """ Train the model and optimise
        Optuna maximises the negative log likelihood, so we
        need to negate the reward here
    """
    
    env_config = dict()
    env_config['folds'] =  {
        'training-set': [datetime.min, datetime(2008, 3, 18)],
        'test-set': [datetime(2008, 3, 19), datetime.max],
    }
    
    env = GAIAPredictorsContinuousV9(env_config)
    
    # implement a way of getting MlpLnLstmPolicy in (and other policies)
    if agent == 'sac':
        model_params = optimize_SAC(trial)
        env = SubprocVecEnv([lambda: env for i in range(n_cpu)])
        model = SAC(sac_hyparam['policy'], env, verbose=0, **model_params)
        
        
    if agent == 'ddpg':
        model_params = optimize_DDPG(trial)
        env = DummyVecEnv([lambda: env])
        model = DDPG(ddpg_hyparam['policy'], env, verbose=0, **model_params)
        
        
    if agent == 'td3': 
        model_params = optimize_TD3(trial)
        env = DummyVecEnv([lambda: env])
        model = TD3(td3_hyparam['policy'], env, verbose=0, **model_params)
        
    # note, depending on the model/agent, it may or may not allow for multiple vectorized environments. 
#     env = SubprocVecEnv([lambda: env for i in range(n_cpu)])
    
    # now let us let the model learn 
    print("Model now learning")
    model.learn(10000)
    print("Model finished learning")
    
    rewards = []
    n_episodes, reward_sum = 0, 0.0
    
    print("Now some optimisation")
    obs = env.reset()
    
    # how many different episodes we want to run
    while n_episodes < 4:
        action, _ = model.predict(obs)
        obs, reward, done, _ = env.step(action)
        reward_sum += reward
        
        
        if done:
            rewards.append(reward_sum)
            reward_sum = 0.0
            n_episodes += 1
            obs = env.reset()
            print("Another episode done")

    last_reward = np.mean(rewards)
    trial.report(-1 * last_reward)
    
    print("Finished")
    print(last_reward)
    
    return -1 * last_reward

In [17]:
study = optuna.create_study(study_name='SAC_v9_optuna', load_if_exists=True)
# storage can go for now --  storage='sqlite:///params.db'
study.optimize(optimize_agent, n_trials=1000, n_jobs=4)

Model now learning
Model now learning
Model now learning
Model now learning
0.010627627


[I 2019-07-31 12:58:56,622] Finished trial#3 resulted in value: -0.0106276273727417. Current best value is -0.0106276273727417 with parameters: {'n_steps': 51.784331509268064, 'gamma': 0.8058968754328775, 'learning_rate': 0.13227274109917356}.


0.013625093


[I 2019-07-31 13:04:41,459] Finished trial#1 resulted in value: -0.013625092804431915. Current best value is -0.013625092804431915 with parameters: {'n_steps': 84.51897871253432, 'gamma': 0.9680524288547906, 'learning_rate': 3.116774334972532e-05}.


Model finished learning
Now some optimisation
Finished
0.0075587993


[I 2019-07-31 13:19:27,038] Finished trial#2 resulted in value: -0.007558799348771572. Current best value is -0.007558799348771572 with parameters: {'n_steps': 106.59185421926038, 'gamma': 0.9556208912146325, 'learning_rate': 0.0017190729815035376}.


Model now learning
0.009890923


[I 2019-07-31 13:41:17,707] Finished trial#4 resulted in value: -0.009890923276543617. Current best value is -0.013625092804431915 with parameters: {'n_steps': 84.51897871253432, 'gamma': 0.9680524288547906, 'learning_rate': 3.116774334972532e-05}.


Model finished learning
Now some optimisation
Finished
0.0021606819


[I 2019-07-31 13:42:10,069] Finished trial#1 resulted in value: -0.002160681877285242. Current best value is -0.007558799348771572 with parameters: {'n_steps': 106.59185421926038, 'gamma': 0.9556208912146325, 'learning_rate': 0.0017190729815035376}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.010025506


[I 2019-07-31 13:58:08,377] Finished trial#5 resulted in value: 0.010025505907833576. Current best value is -0.007558799348771572 with parameters: {'n_steps': 106.59185421926038, 'gamma': 0.9556208912146325, 'learning_rate': 0.0017190729815035376}.


Model now learning
0.011284121


[I 2019-07-31 14:04:16,804] Finished trial#5 resulted in value: -0.011284121312201023. Current best value is -0.013625092804431915 with parameters: {'n_steps': 84.51897871253432, 'gamma': 0.9680524288547906, 'learning_rate': 3.116774334972532e-05}.


0.015605375


[I 2019-07-31 14:12:58,836] Finished trial#2 resulted in value: -0.015605375170707703. Current best value is -0.015605375170707703 with parameters: {'n_steps': 454.292257323849, 'gamma': 0.8014623567760361, 'learning_rate': 0.07608891335675509}.


Model finished learning
Now some optimisation
Finished
0.014398493


[I 2019-07-31 14:35:34,524] Finished trial#6 resulted in value: -0.014398492872714996. Current best value is -0.014398492872714996 with parameters: {'n_steps': 205.10846898821129, 'gamma': 0.9513540303355392, 'learning_rate': 0.024938744617321807}.


Model now learning
-0.0019536298


[I 2019-07-31 14:51:29,961] Finished trial#7 resulted in value: 0.0019536297768354416. Current best value is -0.015605375170707703 with parameters: {'n_steps': 454.292257323849, 'gamma': 0.8014623567760361, 'learning_rate': 0.07608891335675509}.


Model finished learning
Now some optimisation
Finished
0.0031938595


[I 2019-07-31 14:56:47,015] Finished trial#7 resulted in value: -0.003193859476596117. Current best value is -0.014398492872714996 with parameters: {'n_steps': 205.10846898821129, 'gamma': 0.9513540303355392, 'learning_rate': 0.024938744617321807}.


Model now learning
0.0038130987


[I 2019-07-31 15:04:04,517] Finished trial#6 resulted in value: -0.0038130986504256725. Current best value is -0.015605375170707703 with parameters: {'n_steps': 454.292257323849, 'gamma': 0.8014623567760361, 'learning_rate': 0.07608891335675509}.


---------------------------------------
| reference_Q_mean        | -0.00409  |
| reference_Q_std         | 4.66e-10  |
| reference_action_mean   | -1        |
| reference_action_std    | 0         |
| reference_actor_Q_mean  | -0.00409  |
| reference_actor_Q_std   | 4.66e-10  |
| rollout/Q_mean          | -0.0218   |
| rollout/actions_mean    | -0.898    |
| rollout/actions_std     | 0.177     |
| rollout/episode_steps   | 41        |
| rollout/episodes        | 243       |
| rollout/return          | -0.000184 |
| rollout/return_history  | -0.00031  |
| total/duration          | 817       |
| total/episodes          | 243       |
| total/epochs            | 1         |
| total/steps             | 9999      |
| total/steps_per_second  | 12.2      |
| train/loss_actor        | 0.00761   |
| train/loss_critic       | 1.25e-05  |
| train/param_noise_di... | 0         |
---------------------------------------

Model finished learning
Now some optimisation
Finished
0.0


[I 2019-07-31 15:10:38,410] Finished trial#8 resulted in value: -0.0. Current best value is -0.014398492872714996 with parameters: {'n_steps': 205.10846898821129, 'gamma': 0.9513540303355392, 'learning_rate': 0.024938744617321807}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.007970166


[I 2019-07-31 15:20:42,962] Finished trial#9 resulted in value: 0.007970166392624378. Current best value is -0.014398492872714996 with parameters: {'n_steps': 205.10846898821129, 'gamma': 0.9513540303355392, 'learning_rate': 0.024938744617321807}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.025454141


[I 2019-07-31 15:30:37,234] Finished trial#0 resulted in value: -0.025454141199588776. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0063992897


[I 2019-07-31 15:55:45,431] Finished trial#10 resulted in value: -0.006399289704859257. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0026028154


[I 2019-07-31 16:02:04,876] Finished trial#12 resulted in value: -0.002602815395221114. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
0.00732785


[I 2019-07-31 16:09:42,942] Finished trial#0 resulted in value: -0.007327849976718426. Current best value is -0.015605375170707703 with parameters: {'n_steps': 454.292257323849, 'gamma': 0.8014623567760361, 'learning_rate': 0.07608891335675509}.


Model finished learning
Now some optimisation
Finished
0.002478499


[I 2019-07-31 16:30:17,866] Finished trial#4 resulted in value: -0.0024784989655017853. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00014639739


[I 2019-07-31 16:48:01,516] Finished trial#3 resulted in value: 0.00014639738947153091. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.017278183


[I 2019-07-31 17:31:35,593] Finished trial#11 resulted in value: -0.01727818325161934. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0023173061


[I 2019-07-31 17:40:46,153] Finished trial#13 resulted in value: -0.0023173061199486256. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0035207348


[I 2019-07-31 17:42:48,594] Finished trial#15 resulted in value: -0.003520734841004014. Current best value is -0.025454141199588776 with parameters: {'n_steps': 1023.8039301211427, 'gamma': 0.8792849887253174, 'learning_rate': 0.016986024402197635}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.026609816


[I 2019-07-31 18:27:18,424] Finished trial#16 resulted in value: -0.026609815657138824. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model finished learning
Now some optimisation
Model now learning
Finished
0.0070064673


[I 2019-07-31 18:27:34,038] Finished trial#17 resulted in value: -0.007006467320024967. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0070101945


[I 2019-07-31 18:32:09,438] Finished trial#14 resulted in value: -0.007010194472968578. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.010863065


[I 2019-07-31 18:34:40,488] Finished trial#18 resulted in value: -0.010863064788281918. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.006191739


[I 2019-07-31 19:09:26,334] Finished trial#19 resulted in value: -0.00619173888117075. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01320868


[I 2019-07-31 19:28:21,684] Finished trial#20 resulted in value: -0.013208679854869843. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-07-31 19:29:09,615] Finished trial#21 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.007806477


[I 2019-07-31 19:47:50,597] Finished trial#22 resulted in value: -0.007806477136909962. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.002558805


[I 2019-07-31 19:55:12,551] Finished trial#24 resulted in value: 0.0025588050484657288. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.008329334


[I 2019-07-31 20:16:31,320] Finished trial#26 resulted in value: 0.008329333737492561. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.008417244


[I 2019-07-31 20:20:00,721] Finished trial#23 resulted in value: -0.00841724406927824. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00847396


[I 2019-07-31 20:26:09,217] Finished trial#29 resulted in value: -0.008473959751427174. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00029426673


[I 2019-07-31 20:49:46,067] Finished trial#25 resulted in value: -0.0002942667342722416. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.011758592


[I 2019-07-31 21:01:17,744] Finished trial#30 resulted in value: -0.01175859197974205. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.009053696


[I 2019-07-31 21:15:31,727] Finished trial#27 resulted in value: -0.009053695946931839. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.005937174


[I 2019-07-31 21:24:34,681] Finished trial#31 resulted in value: -0.005937173962593079. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-07-31 21:26:38,140] Finished trial#32 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0076346146


[I 2019-07-31 21:36:45,845] Finished trial#33 resulted in value: 0.00763461459428072. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.009582775


[I 2019-07-31 21:40:26,713] Finished trial#28 resulted in value: 0.009582774713635445. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0018031509


[I 2019-07-31 21:45:25,439] Finished trial#34 resulted in value: 0.0018031508661806583. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0082506


[I 2019-07-31 21:55:17,952] Finished trial#37 resulted in value: 0.008250599727034569. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0136313755


[I 2019-07-31 23:54:46,807] Finished trial#35 resulted in value: -0.013631375506520271. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01139902


[I 2019-07-31 23:55:25,822] Finished trial#38 resulted in value: -0.011399020440876484. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00018488552


[I 2019-07-31 23:59:36,416] Finished trial#40 resulted in value: 0.00018488551722839475. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.002799029


[I 2019-08-01 00:05:34,985] Finished trial#36 resulted in value: 0.0027990289963781834. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0012793238


[I 2019-08-01 00:10:30,712] Finished trial#42 resulted in value: 0.0012793238274753094. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0012823422


[I 2019-08-01 00:12:10,481] Finished trial#41 resulted in value: -0.0012823422439396381. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0067476537


[I 2019-08-01 00:15:00,294] Finished trial#43 resulted in value: 0.0067476537078619. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-2.3003275e-05


[I 2019-08-01 00:18:15,431] Finished trial#44 resulted in value: 2.3003274691291153e-05. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
3.383821e-05


[I 2019-08-01 00:21:19,053] Finished trial#39 resulted in value: -3.383820876479149e-05. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0024174983


[I 2019-08-01 00:22:14,958] Finished trial#46 resulted in value: 0.002417498268187046. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00783295


[I 2019-08-01 01:56:34,847] Finished trial#45 resulted in value: -0.007832949981093407. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.004921925


[I 2019-08-01 01:58:28,057] Finished trial#47 resulted in value: -0.004921924788504839. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 02:08:18,889] Finished trial#49 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00051356526


[I 2019-08-01 02:13:02,867] Finished trial#48 resulted in value: 0.0005135652609169483. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.009563416


[I 2019-08-01 02:21:14,038] Finished trial#52 resulted in value: -0.009563416242599487. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.021939851


[I 2019-08-01 02:25:58,856] Finished trial#53 resulted in value: -0.021939851343631744. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00043457176


[I 2019-08-01 02:35:11,005] Finished trial#51 resulted in value: -0.0004345717607066035. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0037698138


[I 2019-08-01 02:38:14,769] Finished trial#50 resulted in value: 0.0037698138039559126. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0006411772


[I 2019-08-01 03:04:55,296] Finished trial#54 resulted in value: -0.0006411771755665541. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.008770286


[I 2019-08-01 03:09:07,576] Finished trial#55 resulted in value: -0.008770286105573177. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.006041741


[I 2019-08-01 03:18:14,460] Finished trial#56 resulted in value: 0.006041740998625755. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.016935272


[I 2019-08-01 03:33:26,176] Finished trial#59 resulted in value: -0.01693527214229107. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00016245479


[I 2019-08-01 03:47:19,176] Finished trial#57 resulted in value: -0.0001624547876417637. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 04:11:52,166] Finished trial#58 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.003573005


[I 2019-08-01 04:25:15,478] Finished trial#60 resulted in value: 0.003573005087673664. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.016485985


[I 2019-08-01 04:36:48,772] Finished trial#61 resulted in value: -0.01648598536849022. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00563691


[I 2019-08-01 04:50:01,085] Finished trial#62 resulted in value: -0.005636909976601601. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0019544899


[I 2019-08-01 04:55:37,157] Finished trial#64 resulted in value: 0.001954489853233099. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01762068


[I 2019-08-01 05:09:12,868] Finished trial#65 resulted in value: -0.01762068085372448. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.005975195


[I 2019-08-01 05:11:06,545] Finished trial#63 resulted in value: -0.0059751952067017555. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0063800286


[I 2019-08-01 05:15:21,299] Finished trial#68 resulted in value: -0.006380028557032347. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.014342093


[I 2019-08-01 05:23:24,422] Finished trial#69 resulted in value: -0.014342092908918858. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.008120161


[I 2019-08-01 05:27:09,665] Finished trial#70 resulted in value: 0.008120160549879074. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00088864565


[I 2019-08-01 05:32:29,769] Finished trial#71 resulted in value: -0.0008886456489562988. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0008439827


[I 2019-08-01 05:35:37,848] Finished trial#72 resulted in value: 0.0008439826779067516. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00056622


[I 2019-08-01 05:49:16,848] Finished trial#73 resulted in value: 0.0005662200273945928. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0051683974


[I 2019-08-01 05:49:34,858] Finished trial#67 resulted in value: -0.005168397445231676. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.005179936


[I 2019-08-01 05:54:34,550] Finished trial#76 resulted in value: -0.005179936066269875. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0071137464


[I 2019-08-01 05:56:16,598] Finished trial#74 resulted in value: -0.007113746367394924. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0106244655


[I 2019-08-01 06:03:55,868] Finished trial#75 resulted in value: -0.01062446553260088. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.019398332


[I 2019-08-01 06:21:35,006] Finished trial#66 resulted in value: 0.01939833164215088. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 06:27:12,263] Finished trial#79 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.013138011


[I 2019-08-01 06:37:21,931] Finished trial#81 resulted in value: 0.01313801109790802. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 07:24:18,210] Finished trial#77 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.014897322


[I 2019-08-01 07:26:18,609] Finished trial#78 resulted in value: -0.014897322282195091. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 08:14:04,954] Finished trial#84 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.019263187


[I 2019-08-01 08:32:02,687] Finished trial#80 resulted in value: -0.019263187423348427. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Model finished learning
Now some optimisation
Finished
0.0028804669


[I 2019-08-01 08:46:13,312] Finished trial#86 resulted in value: -0.0028804668691009283. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Finished
0.006386829


[I 2019-08-01 08:46:16,803] Finished trial#82 resulted in value: -0.006386829074472189. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model now learning
Model finished learning
Now some optimisation
Finished
-0.005935225


[I 2019-08-01 08:51:53,505] Finished trial#87 resulted in value: 0.005935225170105696. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 08:56:26,973] Finished trial#88 resulted in value: -0.0. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.009659126


[I 2019-08-01 09:03:52,732] Finished trial#90 resulted in value: 0.00965912640094757. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0035197507


[I 2019-08-01 09:17:20,407] Finished trial#89 resulted in value: 0.003519750665873289. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
2.1549185e-06


[I 2019-08-01 09:31:01,558] Finished trial#91 resulted in value: -2.1549185476033017e-06. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.011574521


[I 2019-08-01 09:35:06,214] Finished trial#83 resulted in value: -0.011574520729482174. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0047597964


[I 2019-08-01 09:52:35,633] Finished trial#93 resulted in value: 0.004759796429425478. Current best value is -0.026609815657138824 with parameters: {'n_steps': 719.4859693572113, 'gamma': 0.9121175921017415, 'learning_rate': 0.02947990731248775}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.029757723


[I 2019-08-01 10:13:45,957] Finished trial#94 resulted in value: -0.029757723212242126. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0026595006


[I 2019-08-01 10:27:59,679] Finished trial#92 resulted in value: 0.002659500576555729. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.015942354


[I 2019-08-01 10:30:31,818] Finished trial#85 resulted in value: -0.015942353755235672. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0075244093


[I 2019-08-01 10:49:56,303] Finished trial#96 resulted in value: 0.007524409331381321. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00071732176


[I 2019-08-01 11:05:49,298] Finished trial#97 resulted in value: -0.0007173217600211501. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00046938998


[I 2019-08-01 11:08:01,230] Finished trial#98 resulted in value: -0.0004693899827543646. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.003832862


[I 2019-08-01 11:12:26,858] Finished trial#95 resulted in value: -0.003832862013950944. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0093558915


[I 2019-08-01 11:23:37,938] Finished trial#100 resulted in value: -0.00935589149594307. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
8.538808e-05


[I 2019-08-01 11:28:55,499] Finished trial#99 resulted in value: -8.53880774229765e-05. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0053560883


[I 2019-08-01 11:42:43,285] Finished trial#102 resulted in value: -0.005356088280677795. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.004107968


[I 2019-08-01 11:54:16,239] Finished trial#103 resulted in value: -0.00410796795040369. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0050062137


[I 2019-08-01 11:55:42,048] Finished trial#101 resulted in value: 0.0050062136724591255. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0027956022


[I 2019-08-01 11:58:49,363] Finished trial#104 resulted in value: 0.0027956021949648857. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0066748383


[I 2019-08-01 12:37:23,604] Finished trial#106 resulted in value: 0.006674838252365589. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00043007138


[I 2019-08-01 13:11:51,932] Finished trial#105 resulted in value: 0.0004300713771954179. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.005899898


[I 2019-08-01 13:25:06,302] Finished trial#107 resulted in value: 0.005899897776544094. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01778442


[I 2019-08-01 13:25:15,778] Finished trial#108 resulted in value: -0.017784420400857925. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00211854


[I 2019-08-01 13:43:12,375] Finished trial#109 resulted in value: -0.002118539996445179. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.002430255


[I 2019-08-01 14:43:20,308] Finished trial#113 resulted in value: -0.0024302550591528416. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.021641402


[I 2019-08-01 14:57:30,331] Finished trial#110 resulted in value: -0.021641401574015617. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00019732269


[I 2019-08-01 15:16:18,367] Finished trial#111 resulted in value: -0.00019732268992811441. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.012913946


[I 2019-08-01 15:18:39,365] Finished trial#112 resulted in value: -0.012913946062326431. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0057638325


[I 2019-08-01 16:08:33,524] Finished trial#117 resulted in value: 0.005763832479715347. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00090183225


[I 2019-08-01 16:09:12,342] Finished trial#116 resulted in value: -0.0009018322452902794. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.000711231


[I 2019-08-01 16:43:08,845] Finished trial#114 resulted in value: -0.0007112310267984867. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00036491468


[I 2019-08-01 16:52:16,037] Finished trial#118 resulted in value: 0.00036491468199528754. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0009875374


[I 2019-08-01 17:09:38,406] Finished trial#121 resulted in value: -0.0009875374380499125. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.008719671


[I 2019-08-01 17:17:21,073] Finished trial#115 resulted in value: -0.008719670586287975. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.009775341


[I 2019-08-01 17:25:10,647] Finished trial#120 resulted in value: -0.009775340557098389. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0033242735


[I 2019-08-01 17:40:40,722] Finished trial#123 resulted in value: -0.003324273508042097. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.013903872


[I 2019-08-01 17:49:15,833] Finished trial#124 resulted in value: -0.013903872109949589. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 18:03:32,674] Finished trial#125 resulted in value: -0.0. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.001541049


[I 2019-08-01 18:04:41,045] Finished trial#126 resulted in value: -0.0015410489868372679. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.011642741


[I 2019-08-01 18:37:58,127] Finished trial#127 resulted in value: -0.011642741039395332. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.022755552


[I 2019-08-01 18:40:17,145] Finished trial#119 resulted in value: -0.02275555208325386. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.010113859


[I 2019-08-01 19:05:38,778] Finished trial#129 resulted in value: -0.010113858617842197. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0026086953


[I 2019-08-01 19:07:14,258] Finished trial#130 resulted in value: -0.002608695300295949. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 19:28:34,022] Finished trial#128 resulted in value: -0.0. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00015047891


[I 2019-08-01 19:40:53,151] Finished trial#122 resulted in value: 0.00015047891065478325. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0008493331


[I 2019-08-01 20:13:51,557] Finished trial#131 resulted in value: -0.0008493331260979176. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0008581545


[I 2019-08-01 20:16:20,057] Finished trial#132 resulted in value: -0.0008581544971093535. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.005017956


[I 2019-08-01 20:26:49,560] Finished trial#135 resulted in value: -0.0050179557874798775. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0032060568


[I 2019-08-01 20:34:30,961] Finished trial#133 resulted in value: 0.0032060567755252123. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.009278161


[I 2019-08-01 20:38:14,571] Finished trial#136 resulted in value: -0.009278161451220512. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0013969961


[I 2019-08-01 20:48:07,616] Finished trial#137 resulted in value: -0.0013969960855320096. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 20:49:22,411] Finished trial#134 resulted in value: -0.0. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
1.4078735e-05


[I 2019-08-01 21:05:09,985] Finished trial#141 resulted in value: -1.4078735148359556e-05. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.008524984


[I 2019-08-01 21:34:04,731] Finished trial#140 resulted in value: -0.008524984121322632. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.016602203


[I 2019-08-01 21:34:29,399] Finished trial#138 resulted in value: -0.016602203249931335. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.005631943


[I 2019-08-01 21:37:50,411] Finished trial#139 resulted in value: 0.005631942767649889. Current best value is -0.029757723212242126 with parameters: {'n_steps': 496.7369338766725, 'gamma': 0.97797986515614, 'learning_rate': 0.0042812561845731465}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.03037211


[I 2019-08-01 21:56:27,831] Finished trial#145 resulted in value: -0.03037210926413536. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.014712786


[I 2019-08-01 22:05:39,501] Finished trial#144 resulted in value: -0.014712786301970482. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-01 22:05:49,273] Finished trial#142 resulted in value: -0.0. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model now learning
Model finished learning
Now some optimisation
Finished
0.010125623


[I 2019-08-01 22:13:03,813] Finished trial#147 resulted in value: -0.01012562308460474. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.019440467


[I 2019-08-01 22:15:48,049] Finished trial#146 resulted in value: -0.019440466538071632. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.023277584


[I 2019-08-01 22:24:27,389] Finished trial#149 resulted in value: -0.023277584463357925. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.016456971


[I 2019-08-01 22:25:26,955] Finished trial#148 resulted in value: -0.01645697094500065. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.020323304


[I 2019-08-01 22:30:17,748] Finished trial#150 resulted in value: -0.02032330445945263. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.01207666


[I 2019-08-01 22:35:17,279] Finished trial#143 resulted in value: 0.012076660059392452. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0045383754


[I 2019-08-01 22:39:45,559] Finished trial#152 resulted in value: -0.004538375418633223. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01596862


[I 2019-08-01 22:44:36,308] Finished trial#154 resulted in value: -0.015968620777130127. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.015486183


[I 2019-08-01 22:55:36,166] Finished trial#156 resulted in value: -0.015486182644963264. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.02386905


[I 2019-08-01 22:58:46,728] Finished trial#151 resulted in value: -0.023869050666689873. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.023179665


[I 2019-08-01 23:05:03,286] Finished trial#153 resulted in value: -0.023179665207862854. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.011078446


[I 2019-08-01 23:30:10,345] Finished trial#157 resulted in value: -0.011078446172177792. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01557886


[I 2019-08-01 23:39:11,104] Finished trial#158 resulted in value: -0.0155788604170084. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0101628695


[I 2019-08-01 23:44:51,327] Finished trial#159 resulted in value: -0.010162869468331337. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0111073535


[I 2019-08-01 23:50:33,454] Finished trial#162 resulted in value: -0.011107353493571281. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0046458747


[I 2019-08-02 00:05:21,086] Finished trial#155 resulted in value: -0.004645874723792076. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.009330979


[I 2019-08-02 00:21:08,612] Finished trial#164 resulted in value: -0.009330978617072105. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0018964016


[I 2019-08-02 00:50:44,965] Finished trial#160 resulted in value: -0.001896401634439826. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.003991373


[I 2019-08-02 01:00:19,439] Finished trial#161 resulted in value: -0.003991372883319855. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-02 01:05:00,580] Finished trial#165 resulted in value: -0.0. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-02 01:10:59,021] Finished trial#163 resulted in value: -0.0. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-02 01:36:33,426] Finished trial#166 resulted in value: -0.0. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0027773285


[I 2019-08-02 01:38:46,498] Finished trial#169 resulted in value: -0.0027773284818977118. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0004940011


[I 2019-08-02 01:43:53,004] Finished trial#171 resulted in value: -0.0004940010840073228. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0045597167


[I 2019-08-02 01:49:28,174] Finished trial#167 resulted in value: -0.004559716675430536. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.010544248


[I 2019-08-02 01:55:24,338] Finished trial#168 resulted in value: -0.010544247925281525. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0016344602


[I 2019-08-02 02:02:12,725] Finished trial#174 resulted in value: 0.001634460175409913. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.016742267


[I 2019-08-02 02:05:23,639] Finished trial#170 resulted in value: -0.016742266714572906. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0058857705


[I 2019-08-02 02:16:32,031] Finished trial#176 resulted in value: -0.005885770544409752. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.02287587


[I 2019-08-02 02:35:54,254] Finished trial#173 resulted in value: -0.022875869646668434. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00022069982


[I 2019-08-02 03:16:09,451] Finished trial#172 resulted in value: -0.00022069981787353754. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.027887885


[I 2019-08-02 03:19:06,698] Finished trial#175 resulted in value: -0.02788788452744484. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.008645497


[I 2019-08-02 03:29:53,732] Finished trial#177 resulted in value: -0.008645497262477875. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.002933984


[I 2019-08-02 03:48:08,685] Finished trial#181 resulted in value: -0.002933983923867345. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0064981985


[I 2019-08-02 03:58:04,068] Finished trial#178 resulted in value: 0.006498198490589857. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0


[I 2019-08-02 04:07:10,628] Finished trial#182 resulted in value: -0.0. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.006708734


[I 2019-08-02 04:14:09,173] Finished trial#183 resulted in value: -0.006708734203130007. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0049108965


[I 2019-08-02 04:22:38,387] Finished trial#184 resulted in value: -0.00491089653223753. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.00011066464


[I 2019-08-02 04:32:17,226] Finished trial#179 resulted in value: -0.00011066463775932789. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.00016761757


[I 2019-08-02 04:33:34,612] Finished trial#180 resulted in value: 0.0001676175743341446. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0070124


[I 2019-08-02 04:39:43,212] Finished trial#187 resulted in value: -0.007012399844825268. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0054241


[I 2019-08-02 04:40:58,030] Finished trial#188 resulted in value: 0.00542409997433424. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0022592081


[I 2019-08-02 04:42:27,725] Finished trial#189 resulted in value: 0.002259208122268319. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.008268801


[I 2019-08-02 04:45:33,837] Finished trial#185 resulted in value: -0.008268800564110279. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0010829993


[I 2019-08-02 04:54:02,028] Finished trial#190 resulted in value: -0.001082999282516539. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0073719528


[I 2019-08-02 04:57:59,718] Finished trial#192 resulted in value: 0.007371952757239342. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0071268743


[I 2019-08-02 06:12:56,869] Finished trial#186 resulted in value: -0.007126874290406704. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.010535094


[I 2019-08-02 06:29:07,306] Finished trial#193 resulted in value: -0.010535093955695629. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-8.930266e-05


[I 2019-08-02 06:33:23,189] Finished trial#191 resulted in value: 8.9302659034729e-05. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.012210578


[I 2019-08-02 06:35:35,650] Finished trial#194 resulted in value: 0.012210577726364136. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.005714955


[I 2019-08-02 06:38:11,127] Finished trial#196 resulted in value: -0.005714954808354378. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.006772645


[I 2019-08-02 06:56:30,117] Finished trial#195 resulted in value: 0.006772644817829132. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.0059860954


[I 2019-08-02 06:58:55,563] Finished trial#197 resulted in value: -0.005986095406115055. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.014255935


[I 2019-08-02 07:10:10,122] Finished trial#200 resulted in value: -0.014255935326218605. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0028958623


[I 2019-08-02 07:21:32,483] Finished trial#199 resulted in value: 0.002895862329751253. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.001666002


[I 2019-08-02 07:28:29,861] Finished trial#198 resulted in value: 0.0016660019755363464. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.01135381


[I 2019-08-02 07:32:14,940] Finished trial#201 resulted in value: -0.01135381031781435. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.011123394


[I 2019-08-02 07:34:12,177] Finished trial#202 resulted in value: -0.011123393662273884. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0024891635


[I 2019-08-02 07:39:37,948] Finished trial#203 resulted in value: 0.002489163540303707. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
-0.0021117437


[I 2019-08-02 07:52:53,303] Finished trial#205 resulted in value: 0.0021117436699569225. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning
Model finished learning
Now some optimisation
Finished
0.010931166


[I 2019-08-02 07:53:25,828] Finished trial#204 resulted in value: -0.010931165888905525. Current best value is -0.03037210926413536 with parameters: {'n_steps': 218.7153107041124, 'gamma': 0.9501558596684897, 'learning_rate': 0.023345300113504476}.


Model now learning


KeyboardInterrupt: 

In [72]:
env_config = dict()
env_config['folds'] =  {
    'training-set': [datetime.min, datetime(2008, 3, 18)],
    'test-set': [datetime(2008, 3, 19), datetime.max],
}
env = GAIAPredictorsContinuousV9(env_config)

**One should only have to change this cell, and the rest of the experiments will then run from there**

In [73]:
optim_hparam = dict()

optim_hparam['env_id'] = env
# env = DummyVecEnv([lambda: optim_hparam['env_id']])
# env = DummyVecEnv([lambda: env])

optim_hparam['tb_log_dir'] = 'path' #path to the tensoarbod logs

optim_hparam['RL_algo'] = 'ddpg'   #the algorithm one wants to use

optim_hparam['n_trials'] = 100  # number of trials for optimizing hyperparameters 

optim_hparam['n_parallel_jobs'] = 2 # Number of parallel jobs when optimizing hyperparameters

optim_hparam['sampler'] = 'tpe'# Sampler to use when optimizing hyperparameters -- choices['random','tpe']

optim_hparam['pruner'] = 'halving' # Pruner to use when optimizing hyperparamters -- 
# choices['halving','median','none']

optim_hparam['verbose'] = 1 # 0 none, 1 INFO

optim_hparam['log_folder'] = 'path'

optim_hparam['pretrained'] = '' # path to a pretrained agent, if want to continue training

**Now for the actual optimisation**

In [53]:
tensorboard_log = None if optim_hparam['tb_log_dir'] == '' else os.path.join(optim_hparam['tb_log_dir']\
                                                                         ,optim_hparam['RL_algo'] + '_'\
                                                                        ,str(optim_hparam['env_id']))

In [54]:
hyperparams = dict()

n_actions = env.action_space.shape[-1]
hyperparams['noise_std]'] = [float(0.5),float(1.0)]
hyperparams['action_noise'] = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),\
                                                           sigma=hyperparams['noise_std]'] * np.ones(n_actions))

hyperparams['param_noise'] = None
# AdaptiveParamNoiseSpec(initial_stddev=noise_std,desired_action_stddev=noise_std)

hyperparams['n_timesteps'] = 500000

hyperparams['n_envs'] = 1
# hyperparams['env_wrapper'] = 'GAIAPredictorsContinuousV9' # need to establish my envs wrapper here

# env_wrapper = get_wrapper_class(hyperparams)

In [69]:
def create_env(n_envs): 
    global hyperparams
    
    # look into the wrapper_class documentation 
    env = DummyVecEnv([make_env(optim_hparam['env_id'])])
    env = VecNormalize(env)

    
    return env 

In [65]:
def create_model(*_args,**kwargs):
    '''
    Cretae a model with different hyperparams
    '''
    verbose = 0 
    return optim_hparam['RL_algo'](env=env, tensorboard_log = tensorboard_log,
                                  verbose = verbose, **kwargs)

In [74]:
print("Optimizing hyperparameters")

data_frame = hyperparam_optimization(optim_hparam['RL_algo'],create_model,optim_hparam['env_id'],
                                    n_trials = optim_hparam['n_trials'], 
                                    hyperparams=hyperparams,
                                    n_jobs = optim_hparam['n_parallel_jobs'], 
                                    sampler_method = optim_hparam['sampler'], 
                                    pruner_method = optim_hparam['pruner'], 
                                    verbose = optim_hparam['verbose'])

report_name = "report_{}_{}-trials-{}-{}-{}.csv".format(optim_hparam['env_id'], \
                                                        optim_hparam['n_trials'],\
                                                        optim_hparam['sampler'], \
                                                        optim_hparam['pruner'])

log_path = os.path.join(optim_hparam['log_folder'], optim_hparam['RL_algo'], report_name)

print("Writing report to {}".format(log_path))

os.makedirs(os.path.dirname(log_path), exist_ok=True)
data_frame.to_csv(log_path)
print("Optimization complete")

Optimizing hyperparameters
Sampler: tpe - Pruner: halving


[W 2019-07-30 23:46:53,388] Setting status of trial#0 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:46:53,392] Setting status of trial#1 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_ac

TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:00,667] Setting status of trial#15 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:01,666] Setting status of trial#16 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zo

TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:08,695] Setting status of trial#30 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:08,696] Setting status of trial#31 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zo

TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:15,726] Setting status of trial#45 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:16,724] Setting status of trial#46 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zo

TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:23,735] Setting status of trial#60 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:24,129] Setting status of trial#61 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zo

TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:31,210] Setting status of trial#75 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:31,210] Setting status of trial#76 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zo

TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:38,215] Setting status of trial#90 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zoo/utils/hyperparams_opt.py", line 76, in objective
    trial.n_actions = env_fn(n_envs=1).action_space.shape[0]
TypeError: 'GAIAPredictorsContinuousV9' object is not callable
[W 2019-07-30 23:47:39,215] Setting status of trial#91 as TrialState.FAIL because of the following error: TypeError("'GAIAPredictorsContinuousV9' object is not callable",)
Traceback (most recent call last):
  File "/home/Nicholas/.venv/lib/python3.6/site-packages/optuna/study.py", line 468, in _run_trial
    result = func(trial)
  File "/home/Nicholas/trading-gym/rl-baselines-zo

Number of finished trials:  100
Best trial:


ValueError: No trials are completed yet.

In [None]:
# first give it a go with two folds. 




n_actions = env.action_space.shape[-1]
param_noise = None
# param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.1, desired_action_stddev=0.1)
t_steps = 500000

action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

#  if you use param_noise, need to use LnMlpPolicy instead 
model = DDPG(MlpPolicy, env, verbose=0, param_noise=param_noise, action_noise=action_noise)
model.learn(total_timesteps=t_steps)
t_steps = '500k'
model.save("saved_models/2folds_tuning/DDPG/DDPG_v9_steps={}".format(t_steps))

# del model # remove to demonstrate saving and loading

Then reload the agent and  

In [None]:
episode = env.sample_episode(
        fold = 'test-set',
        stable_model=True,
        model=model,
        episode_length=None,
        benchmark=env._load_benchmark().squeeze(),
        risk_free=env._load_risk_free().squeeze(),
        burn=1,
    )
episode.renderer.plotly_report()
episode.renderer.tearsheet()