In [1]:
import os
import pathlib
import numpy as np
import click
import json
import torch

#from rlkit.envs import ENVS
from rlkit.envs.wrappers import NormalizedBoxEnv
from rlkit.torch.sac.policies import TanhGaussianPolicy
from rlkit.torch.networks import FlattenMlp, MlpEncoder, RecurrentEncoder
from rlkit.torch.sac.sac import PEARLSoftActorCritic
from rlkit.torch.sac.agent import PEARLAgent
from rlkit.launchers.launcher_util import setup_logger
import rlkit.torch.pytorch_util as ptu
from configs.default import default_config

In [2]:
# default PEARL experiment settings
# all experiments should modify these settings only as needed
default_config = dict(
    env_name='hjgzuighi',
    n_train_tasks=2,
    n_eval_tasks=2,
    latent_size=2, # dimension of the latent context vector
    net_size=10, # number of units per FC layer in each network
    path_to_weights=None, # path to pre-trained weights to load into networks
    env_params=dict(
        n_tasks=2, # number of distinct tasks in this domain, shoudl equal sum of train and eval tasks
        randomize_tasks=True, # shuffle the tasks after creating them
    ),
    algo_params=dict(
        meta_batch=16, # number of tasks to average the gradient across
        num_iterations=500, # number of data sampling / training iterates
        num_initial_steps=2000, # number of transitions collected per task before training
        num_tasks_sample=5, # number of randomly sampled tasks to collect data for each iteration
        num_steps_prior=400, # number of transitions to collect per task with z ~ prior
        num_steps_posterior=0, # number of transitions to collect per task with z ~ posterior
        num_extra_rl_steps_posterior=400, # number of additional transitions to collect per task with z ~ posterior that are only used to train the policy and NOT the encoder
        num_train_steps_per_itr=2000, # number of meta-gradient steps taken per iteration
        num_evals=2, # number of independent evals
        num_steps_per_eval=600,  # nuumber of transitions to eval on
        batch_size=256, # number of transitions in the RL batch
        embedding_batch_size=64, # number of transitions in the context batch
        embedding_mini_batch_size=64, # number of context transitions to backprop through (should equal the arg above except in the recurrent encoder case)
        max_path_length=200, # max path length for this environment
        discount=0.99, # RL discount factor
        soft_target_tau=0.005, # for SAC target network update
        policy_lr=3E-4,
        qf_lr=3E-4,
        vf_lr=3E-4,
        context_lr=3e-4,
        reward_scale=5., # scale rewards before constructing Bellman update, effectively controls weight on the entropy of the policy
        sparse_rewards=False, # whether to sparsify rewards as determined in env
        kl_lambda=.1, # weight on KL divergence term in encoder loss
        use_information_bottleneck=True, # False makes latent context deterministic
        use_next_obs_in_context=False, # use next obs if it is useful in distinguishing tasks
        update_post_train=1, # how often to resample the context when collecting data during training (in trajectories)
        num_exp_traj_eval=1, # how many exploration trajs to collect before beginning posterior sampling at test time
        recurrent=False, # recurrent or permutation-invariant encoder
        dump_eval_paths=False, # whether to save evaluation trajectories
    ),
    util_params=dict(
        base_log_dir='output',
        use_gpu=False,
        gpu_id=0,
        debug=False, # debugging triggers printing and writes logs to debug directory
        docker=False, # TODO docker is not yet supported
    )
)


In [3]:
class space():
    flat_dim = 3
    high = 0.1
    low = -0.1
    pass

In [4]:
class task():
    
    def __init__(self):
        #self.observation_space = {"low":-np.inf, "high":np.inf,'flat_dim':2}
        self.observation_space = space()
        print(self.observation_space.flat_dim)
        
        self.action_space = space()
        #self._goal = [-1,1,0] #needed ? correct?
    def reset_task(self, idx):
        pass
    
    def reset(self):
        return np.asarray([0,1,2])
    
    def step(self, a):
        print('ACTIOIN:', a)
        
        
        forward_reward = 10-np.sum(np.abs(a-[-1,1,0]))
        ctrl_cost = 0. # .5 * np.square(a).sum()
        contact_cost = 0.
        survive_reward = 0. # 1.0
        
        
        done = False #will it ever terminate tho?
        reward = forward_reward - ctrl_cost - contact_cost + survive_reward
        print('reward:', reward)
        return np.asarray([0,1,2]), reward, done, dict(
            reward_forward=forward_reward,
            reward_ctrl=-ctrl_cost,
            reward_contact=-contact_cost,
            reward_survive=survive_reward,
            torso_velocity=forward_reward,
        )

In [5]:

def experiment(variant):

    # create multi-task environment and sample tasks
    
    TAKS = task()
    
    env = TAKS#'yolo'#NormalizedBoxEnv(ENVS[variant['env_name']](**variant['env_params']))
    tasks = [0,1,2]
    obs_dim = int(3)
    action_dim = int(3)
    reward_dim = 1

    # instantiate networks
    latent_dim = variant['latent_size']
    context_encoder_input_dim = 2 * obs_dim + action_dim + reward_dim if variant['algo_params']['use_next_obs_in_context'] else obs_dim + action_dim + reward_dim
    context_encoder_output_dim = latent_dim * 2 if variant['algo_params']['use_information_bottleneck'] else latent_dim
    net_size = variant['net_size']
    recurrent = variant['algo_params']['recurrent']
    encoder_model = RecurrentEncoder if recurrent else MlpEncoder

    context_encoder = encoder_model(
        hidden_sizes=[200, 200, 200],
        input_size=context_encoder_input_dim,
        output_size=context_encoder_output_dim,
    )
    qf1 = FlattenMlp(
        hidden_sizes=[net_size, net_size, net_size],
        input_size=obs_dim + action_dim + latent_dim,
        output_size=1,
    )
    qf2 = FlattenMlp(
        hidden_sizes=[net_size, net_size, net_size],
        input_size=obs_dim + action_dim + latent_dim,
        output_size=1,
    )
    vf = FlattenMlp(
        hidden_sizes=[net_size, net_size, net_size],
        input_size=obs_dim + latent_dim,
        output_size=1,
    )
    policy = TanhGaussianPolicy(
        hidden_sizes=[net_size, net_size, net_size],
        obs_dim=obs_dim + latent_dim,
        latent_dim=latent_dim,
        action_dim=action_dim,
    )
    agent = PEARLAgent(
        latent_dim,
        context_encoder,
        policy,
        **variant['algo_params']
    )
    algorithm = PEARLSoftActorCritic(
        env=env,
        train_tasks=list(tasks[:variant['n_train_tasks']]),
        eval_tasks=list(tasks[-variant['n_eval_tasks']:]),
        nets=[agent, qf1, qf2, vf],
        latent_dim=latent_dim,
        **variant['algo_params']
    )
    print('herefutff')
    # optionally load pre-trained weights
    if variant['path_to_weights'] is not None:
        path = variant['path_to_weights']
        context_encoder.load_state_dict(torch.load(os.path.join(path, 'context_encoder.pth')))
        qf1.load_state_dict(torch.load(os.path.join(path, 'qf1.pth')))
        qf2.load_state_dict(torch.load(os.path.join(path, 'qf2.pth')))
        vf.load_state_dict(torch.load(os.path.join(path, 'vf.pth')))
        # TODO hacky, revisit after model refactor
        algorithm.networks[-2].load_state_dict(torch.load(os.path.join(path, 'target_vf.pth')))
        policy.load_state_dict(torch.load(os.path.join(path, 'policy.pth')))
    print('zhtgdtseru')
    # optional GPU mode
    #ptu.set_gpu_mode(variant['util_params']['use_gpu'], variant['util_params']['gpu_id'])
    #if ptu.gpu_enabled():
        #algorithm.to()

    # debugging triggers a lot of printing and logs to a debug directory
    DEBUG = variant['util_params']['debug']
    os.environ['DEBUG'] = str(int(DEBUG))
    print('löllll')

    # create logging directory
    # TODO support Docker
##############    
    
    exp_id = 'debug' if DEBUG else None
    experiment_log_dir = setup_logger(variant['env_name'], variant=variant, exp_id=exp_id, base_log_dir=variant['util_params']['base_log_dir'])
    
##############    
    
    print('hgdftzrd')
    # optionally save eval trajectories as pkl files
    if variant['algo_params']['dump_eval_paths']:
        pickle_dir = experiment_log_dir + '/eval_trajectories'
        pathlib.Path(pickle_dir).mkdir(parents=True, exist_ok=True)

    # run the algorithm
    print('daa')
    algorithm.train()

def deep_update_dict(fr, to):
    ''' update dict of dicts with new values '''
    # assume dicts have same keys
    for k, v in fr.items():
        if type(v) is dict:
            deep_update_dict(v, to[k])
        else:
            to[k] = v
    return to

def main():

    variant = default_config

    experiment(variant)

main()

3
herefutff
zhtgdtseru
löllll
2020-12-01 23:46:50.526266 CET | Variant:
2020-12-01 23:46:50.526836 CET | {
  "env_name": "hjgzuighi",
  "n_train_tasks": 2,
  "n_eval_tasks": 2,
  "latent_size": 2,
  "net_size": 10,
  "path_to_weights": null,
  "env_params": {
    "n_tasks": 2,
    "randomize_tasks": true
  },
  "algo_params": {
    "meta_batch": 16,
    "num_iterations": 500,
    "num_initial_steps": 2000,
    "num_tasks_sample": 5,
    "num_steps_prior": 400,
    "num_steps_posterior": 0,
    "num_extra_rl_steps_posterior": 400,
    "num_train_steps_per_itr": 2000,
    "num_evals": 2,
    "num_steps_per_eval": 600,
    "batch_size": 256,
    "embedding_batch_size": 64,
    "embedding_mini_batch_size": 64,
    "max_path_length": 200,
    "discount": 0.99,
    "soft_target_tau": 0.005,
    "policy_lr": 0.0003,
    "qf_lr": 0.0003,
    "vf_lr": 0.0003,
    "context_lr": 0.0003,
    "reward_scale": 5.0,
    "sparse_rewards": false,
    "kl_lambda": 0.1,
    "use_information_bottleneck": t

ACTIOIN: [-0.37542653 -0.7710982   0.16903244]
reward: 7.435295894742012
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.37245     0.30779788 -0.11989341]
reward: 8.560354463756084
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.63687354 -0.8395117   0.73969   ]
reward: 5.783924758434296
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7719075 -0.3680306 -0.6707869]
reward: 6.189274966716766
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.93309313  0.32549223  0.18952179]
reward: 9.069063574075699
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.03292174 -0.16810371 -0.27402246]
reward: 7.590795565396547
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.2031387  -0.46474987 -0.03919432]
reward: 7.699194498360157
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5011103  -0.89874685  0.367163  ]
reward: 6.232979834079742
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.13749234 0.92557335 0.40180907]
reward: 8.386271938681602
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.846207    0.58254915  0.985835

ACTIOIN: [-0.42107937  0.42436704 -0.65512836]
reward: 8.190318048000336
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9737618 -0.5093582 -0.6931495]
reward: 5.82373046875
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7899817  -0.48869136  0.6059908 ]
reward: 7.695299535989761
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.83704674 -0.7650888   0.30426762]
reward: 7.76769033074379
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.3572793  0.79547656 0.42917067]
reward: 8.00902658700943
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.2932716  -0.96558636  0.7595838 ]
reward: 5.981558263301849
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6373267  -0.3117684   0.90818524]
reward: 6.142719626426697
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.69495314 -0.57541895 -0.6184236 ]
reward: 7.5011106133461
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.11806749 -0.5006669  -0.83802164]
reward: 6.7793789356946945
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.23234025 -0.1362147  -0.41788206]
rew

ACTIOIN: [-0.4444314 -0.4039128  0.0145671]
reward: 8.025951483286917
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.48790303  0.9171947  -0.9701504 ]
reward: 7.459141284227371
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.13315327 -0.48371983 -0.36990446]
reward: 7.279528990387917
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.24006276 -0.49555695  0.70201623]
reward: 7.042489573359489
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8715101  0.7712866  0.8964784]
reward: 8.746318280696869
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.08701189 -0.5395558   0.97655505]
reward: 6.570901051163673
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.77097094  0.91139746  0.9755895 ]
reward: 8.706778883934021
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5813843  -0.9833389  -0.84602576]
reward: 5.5892510414123535
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.1345705   0.69338834  0.94509685]
reward: 7.882861986756325
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7603047   0.26388553  0.90241

ACTIOIN: [-0.11970051 -0.13322362 -0.12971604]
reward: 7.85676085203886
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.15342116 -0.09942313  0.79689634]
reward: 6.950259372591972
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7366942   0.82946205  0.49575722]
reward: 9.070399045944214
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.045881   0.342699   0.00180525]
reward: 8.295012741233222
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.1003516  -0.50443906  0.9657478 ]
reward: 6.429461568593979
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.24700226  0.07334693 -0.9775148 ]
reward: 7.342834383249283
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.17439798 -0.13257982  0.75436133]
reward: 6.938660874962807
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.16508579 -0.49424508 -0.89095104]
reward: 6.779889672994614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.68959063 0.9802755  0.17749257]
reward: 8.113192304968834
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6619932  -0.31847134  0.0806919

ACTIOIN: [ 0.2608194  -0.9278193  -0.33557454]
reward: 6.475786745548248
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.657495   -0.20330419 -0.30489734]
reward: 8.14929349720478
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-2.1092438e-04 -8.1163526e-01  6.8979472e-02]
reward: 7.119596196644125
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7076327   0.97532684 -0.44787455]
reward: 9.235085010528564
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7849876  -0.44416377 -0.32581386]
reward: 8.015009999275208
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.42152947 0.32870394 0.06279507]
reward: 7.844379402697086
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8836446   0.23573221 -0.6256855 ]
reward: 8.4936912804842
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.01478914 -0.0895518  -0.4403954 ]
reward: 7.455263648182154
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7614705   0.14816399 -0.4847868 ]
reward: 8.424847677350044
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.18793204  0.30231413 

ACTIOIN: [ 0.63511527 -0.2826792  -0.8642009 ]
reward: 6.218004643917084
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.09263244 -0.0438709  -0.35084593]
reward: 7.697915609925985
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.747013  0.9600016 0.4065562]
reward: 7.8064324259758
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.83069277  0.69287384  0.47604117]
reward: 9.047525435686111
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.7866082  0.9184633  0.08752836]
reward: 8.044326707720757
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7243919  0.9159656  0.4346695]
reward: 9.205687999725342
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.39141208  0.95415586 -0.65503085]
reward: 8.690537095069885
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6070999   0.86692244 -0.01110323]
reward: 8.248719315044582
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7272225  -0.40136194 -0.85013425]
reward: 6.02128130197525
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.607762   0.96927977 0.65049386]
reward: 

ACTIOIN: [0.66216445 0.68064046 0.91027665]
reward: 7.10819935798645
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.93640625 -0.87827975  0.05571693]
reward: 8.00240957736969
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5084228   0.02154599  0.32561037]
reward: 8.204358410090208
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.172573   -0.7714964   0.62033486]
reward: 6.435595721006393
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.55143255  0.40226197  0.067154  ]
reward: 8.886540524661541
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.77280605 -0.05124727  0.14691082]
reward: 8.574647959321737
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.15387444 -0.7152448  -0.31687775]
reward: 7.121751859784126
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.48441106  0.8830261  -0.2661432 ]
reward: 8.132471859455109
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.11049409 -0.04301347 -0.40070534]
reward: 7.666775286197662
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5723226  -0.04612134  0.96686

ACTIOIN: [-0.1369509   0.67247766 -0.30638272]
reward: 8.503045842051506
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8140142 -0.9439162  0.5498496]
reward: 5.692219972610474
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.70491564 0.35107866 0.7664299 ]
reward: 6.879733115434647
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.12759376  0.98021024 -0.79140896]
reward: 8.06120753288269
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.44463232  0.96468467  0.01517183]
reward: 9.394145156256855
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.72053665 0.90688294 0.8972456 ]
reward: 7.289100706577301
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.32737392 -0.77827686 -0.31802392]
reward: 7.2310731410980225
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.6001089  0.06362116 0.40597647]
reward: 7.057535767555237
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7926493   0.04910326 -0.23227055]
reward: 8.609482035040855
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.24915037 0.02305668 0.4232054 ]
rewar

ACTIOIN: [ 0.20434895  0.06639981 -0.92780155]
reward: 6.934249311685562
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.06749768 -0.17042129 -0.40249628]
reward: 7.359584756195545
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7837817 -0.9636652 -0.8993504]
reward: 6.920766115188599
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8389326  -0.05423961  0.8078975 ]
reward: 6.298930309712887
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.49485317 -0.6086427   0.8636834 ]
reward: 7.022527068853378
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.63945526  0.6714204   0.00573465]
reward: 9.305140999611467
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.49520028 -0.8614966   0.5403872 ]
reward: 7.093316435813904
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.64197457 -0.3484052   0.2737069 ]
reward: 6.735913306474686
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7690222 -0.9467974  0.98339  ]
reward: 6.838834881782532
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.36993355  0.4976559  -0.711489

ACTIOIN: [-0.6949278  -0.6665243   0.90335643]
reward: 7.125047087669373
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.53849787 0.5734057  0.9601164 ]
reward: 7.074791431427002
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.06087897  0.5825184  -0.5967851 ]
reward: 8.046612236648798
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.17407283  0.35699576 -0.5427639 ]
reward: 7.988304704427719
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8391515   0.43650118 -0.8479724 ]
reward: 8.427680283784866
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5687278  -0.36112627  0.48849502]
reward: 6.581650912761688
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6008716  -0.69321823 -0.36217102]
reward: 7.545482367277145
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9796135  -0.64857936  0.07365397]
reward: 8.257380150258541
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.74946415 -0.16201785 -0.17839506]
reward: 6.910122931003571
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6001186   0.19726047  0.682

Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.901081   0.2998715 -0.7816394]
reward: 8.419313132762909
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6068312  0.511162  -0.9505541]
reward: 8.167439103126526
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.86672145 -0.25691542 -0.6621698 ]
reward: 6.2141933143138885
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.46371257 -0.7724786  -0.9460338 ]
reward: 5.817775070667267
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.74542165  0.4793738  -0.070094  ]
reward: 9.154701456427574
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.90299875  0.01937815  0.4767305 ]
reward: 8.445646403357387
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4102266  -0.9462098  -0.47653317]
reward: 6.987483650445938
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6653271  -0.68310696 -0.00312597]
reward: 6.648439996642992
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.04283664 -0.13720047  0.9963006 ]
reward: 6.823662307113409
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN:

ACTIOIN: [0.9745293  0.36445048 0.0860176 ]
reward: 7.303903557360172
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.9622157 0.4719291 0.5643816]
reward: 6.945331782102585
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.43595046  0.3193332   0.9621259 ]
reward: 7.793157756328583
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.49315336 -0.38460827  0.64223343]
reward: 6.48000493645668
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.766744    0.41473502  0.50837916]
reward: 8.673099875450134
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.3110428  -0.7806558   0.99251866]
reward: 6.537868320941925
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.06540457 -0.50510246  0.9668795 ]
reward: 6.593422628939152
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.54124504 -0.48593792  0.59136593]
reward: 6.381451100111008
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.96133935 -0.8956784  -0.07606329]
reward: 7.989597663283348
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.28920692  0.89408976 -0.59147686]


ACTIOIN: [ 0.4811209  -0.24749754  0.87162995]
reward: 6.399751588702202
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8694229  -0.6690744  -0.98110276]
reward: 5.480399906635284
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.85379565 0.8080864  0.09872496]
reward: 7.855565786361694
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8467968   0.47048753 -0.0827754 ]
reward: 9.234508946537971
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.80406785 0.17533256 0.02051061]
reward: 7.350754102692008
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.2594878 -0.9355753  0.5007873]
reward: 6.823125183582306
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.15239903 -0.9592061   0.68197024]
reward: 6.2064246237277985
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.18825485  0.7747343  -0.3958688 ]
reward: 8.1906106621027
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.1906464   0.90630496 -0.40285113]
reward: 8.694100216031075
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9558478   0.82014847  0.8442695 ]


ACTIOIN: [ 0.9048545  -0.68324786  0.087204  ]
reward: 6.3246936574578285
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.45590556 -0.98038083  0.6441801 ]
reward: 5.9195334911346436
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.64505273 -0.70351106  0.58330745]
reward: 6.068128764629364
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9780963  0.9831782  0.3729567]
reward: 9.588317811489105
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.39345223 0.43371904 0.47562122]
reward: 7.56464558839798
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.93091387  0.27471006  0.6571588 ]
reward: 8.548465132713318
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.32120922 0.9437208  0.9226525 ]
reward: 7.699859112501144
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7375364  0.1692814 -0.8174991]
reward: 8.089318662881851
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7762728  -0.45156497  0.32877806]
reward: 6.443384200334549
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.74955904 0.13550685 0.9757939 ]
rewa

ACTIOIN: [-0.45607236  0.23117116 -0.46947247]
reward: 8.217771053314209
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.77873266 -0.25866732  0.5117532 ]
reward: 8.00831213593483
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.70813423  0.8481548   0.45807183]
reward: 9.098217189311981
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8704555   0.3903107   0.04237125]
reward: 9.218394961208105
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.13322362 -0.8801862   0.9244908 ]
reward: 6.328546613454819
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5984219  -0.16333507 -0.9416492 ]
reward: 6.296593859791756
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.12475929 -0.38897496  0.79568005]
reward: 6.6905857026577
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6190147  -0.970084   -0.47497088]
reward: 7.173959791660309
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6701622   0.45860368 -0.73507005]
reward: 8.393695831298828
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.06659921  0.5743217  -0.310

ACTIOIN: [ 0.83239293 -0.9272779  -0.06531182]
reward: 6.175017327070236
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.34620237 0.35689846 0.8797516 ]
reward: 7.130944460630417
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.51767284  0.5872285   0.00225334]
reward: 9.10264796949923
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.36837047 -0.70673543 -0.5385659 ]
reward: 6.386328220367432
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9635535   0.13411467  0.01061368]
reward: 9.087054478004575
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.7598737  0.9825613  0.84947443]
reward: 7.373213171958923
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6251312   0.49148482  0.84796417]
reward: 8.268651843070984
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.33001107  0.6662507  -0.5782955 ]
reward: 7.757944107055664
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.85833514 -0.41574922 -0.33553806]
reward: 6.390377581119537
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.97488874 -0.07464095 -0.0411362

ACTIOIN: [ 0.5224512  0.8733553 -0.865258 ]
reward: 7.48564612865448
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.81244457 0.2924694  0.8368971 ]
reward: 6.643127769231796
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.32037586 0.18884967 0.26603895]
reward: 7.602434858679771
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.20618069 0.32331634 0.72515875]
reward: 7.391976892948151
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.362841    0.27141795 -0.14894582]
reward: 7.759631112217903
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.92168987  0.3036509  -0.75972575]
reward: 8.465615004301071
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6118228 -0.5386067  0.7138036]
reward: 7.359412491321564
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9573814  0.6215353  0.7447277]
reward: 8.834189057350159
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7309626  -0.9950647  -0.27410564]
reward: 7.4617922604084015
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6330397  -0.03076105  0.9655904 ]
reward: 

ACTIOIN: [ 0.1681845  -0.05935893  0.3042598 ]
reward: 7.468196760863066
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.311614   -0.85511684  0.53798777]
reward: 6.295281380414963
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.13667294 0.96306807 0.8748295 ]
reward: 7.951565653085709
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7246482 -0.1314548 -0.7032903]
reward: 7.889903098344803
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.61011034 -0.61047     0.55305916]
reward: 6.226360499858856
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.83411014 -0.8346435   0.45245996]
reward: 7.547006696462631
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5251265 -0.2623842  0.349364 ]
reward: 7.913378298282623
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-3.4955103e-04 -8.6567497e-01 -4.5042723e-01]
reward: 6.684247344324831
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-5.1615041e-01 -4.8858795e-04  8.0656213e-01]
reward: 7.709099701314699
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6615294  -0.087

ACTIOIN: [-0.5062915  -0.91127956 -0.43799925]
reward: 7.157012701034546
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.86059654  0.887424   -0.56155485]
reward: 9.186465680599213
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.63868576 0.1510169  0.5730782 ]
reward: 6.939252927899361
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.5983399  0.32706848 0.6498389 ]
reward: 7.078889638185501
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6384838   0.12634692 -0.97426856]
reward: 6.513594537973404
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.44129395  0.14364909  0.19302873]
reward: 8.391914308071136
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.8124174  0.9381606  0.30068955]
reward: 7.825053662061691
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.97154427  0.7554874  -0.8750319 ]
reward: 6.908911228179932
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.7992862 0.4133331 0.8999413]
reward: 6.714105576276779
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6999692   0.26269618 -0.98005277]
rewar

ACTIOIN: [-0.6339477  -0.939592    0.70497257]
reward: 6.989383101463318
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9248166  -0.9264495  -0.11973856]
reward: 7.878628574311733
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.19776092  0.48367402  0.27125388]
reward: 8.410181060433388
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.3364294  -0.06469663 -0.2814023 ]
reward: 7.317471690475941
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.32236856  0.21776927 -0.7582053 ]
reward: 7.781932532787323
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9432404  -0.98139286  0.4900298 ]
reward: 7.4718177318573
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6728549   0.05388107  0.6440718 ]
reward: 8.082664154469967
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7279639  -0.37818703  0.5489415 ]
reward: 6.3449075520038605
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.37492344 -0.47832835  0.81103444]
reward: 7.085560649633408
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.18313843  0.8490337  -0.9

reward: 8.497749745845795
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.76558656 -0.9337359  -0.69479716]
reward: 5.605880379676819
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.865702    0.90544295  0.7608835 ]
reward: 9.010261416435242
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.14030471  0.06083473  0.6768097 ]
reward: 7.524329714477062
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.3534499   0.73705155 -0.7298861 ]
reward: 8.360615342855453
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.82477874 -0.20606738 -0.42766094]
reward: 6.541492938995361
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.80315685  0.4440808  -0.72755426]
reward: 6.913369685411453
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.06202256  0.79826975 -0.4645622 ]
reward: 8.271684981882572
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.86879086 0.39286196 0.02999077]
reward: 7.494080325588584
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.83893555 -0.9051068   0.7768304 ]
reward: 7.1569983959198
Observation: [0 

ACTIOIN: [-0.7986425  -0.83217    -0.04844559]
reward: 7.91802691668272
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.85805476 -0.9160814  -0.644718  ]
reward: 5.581145823001862
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.1190923   0.5935441  -0.45329106]
reward: 8.25934536755085
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.98948556  0.856137    0.6274325 ]
reward: 9.218190014362335
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.87451696 -0.3214926   0.464624  ]
reward: 6.339366436004639
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9625196  -0.15673289  0.14763689]
reward: 6.733110636472702
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.01264278  0.1934408  -0.817442  ]
reward: 7.3886415753513575
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8784629   0.83096594 -0.9913901 ]
reward: 6.961112916469574
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7022238  -0.8855555   0.43168384]
reward: 5.980536878108978
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.35788783 -0.7971185   0.6

ACTIOIN: [-0.94716865 -0.77413154  0.91554147]
reward: 7.257495641708374
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.801252   -0.08375362  0.33012497]
reward: 6.784869395196438
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.12789895  0.0434704  -0.5593898 ]
reward: 7.356181621551514
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.6982129  0.5139616  0.77741534]
reward: 7.038333356380463
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.79023635 0.51529604 0.6907046 ]
reward: 7.034355103969574
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9053634 -0.5695131 -0.9393089]
reward: 5.585814654827118
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.13409883 -0.7370388  -0.67110646]
reward: 6.725953578948975
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.41064346 -0.811414    0.39812642]
reward: 6.379816114902496
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.3383796  -0.5244756  -0.48537165]
reward: 7.328532367944717
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.09632526  0.74783254 -0.6313176 ]

ACTIOIN: [ 0.74908745 -0.40046203  0.16741659]
reward: 6.683033928275108
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.98211724  0.84569305 -0.91982484]
reward: 8.90798544883728
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8363405   0.09581707 -0.64111525]
reward: 8.291042312979698
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.59712994  0.20391041 -0.38506898]
reward: 7.221711486577988
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8544329  -0.9597289  -0.67952335]
reward: 7.2151806354522705
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.78442454 0.68490463 0.365435  ]
reward: 7.535045087337494
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9880748   0.4744031  -0.71220565]
reward: 6.7741226851940155
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.74622744  0.9782377  -0.55905   ]
reward: 9.165415108203888
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7099761  -0.811444   -0.43576616]
reward: 6.042813777923584
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.91640145 -0.50044537  0.10

ACTIOIN: [-0.6367792  -0.19584438  0.4777248 ]
reward: 7.963210016489029
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.10673956 -0.44074747 -0.80462074]
reward: 6.861371345818043
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5272685  -0.48363608 -0.7547627 ]
reward: 7.288869738578796
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.11992613  0.8747182  -0.7546431 ]
reward: 8.240001238882542
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.57206964 -0.5779666   0.69515723]
reward: 6.154806554317474
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.11050642 -0.5786564   0.30022237]
reward: 7.010614834725857
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.47579485 -0.81227505  0.80282986]
reward: 5.909100234508514
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4835107  -0.88162565  0.00664752]
reward: 7.595237526576966
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.516997   -0.8236086   0.64294255]
reward: 7.050445854663849
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8557913  -0.2672221  -0.

obs: [0 1 2]
ACTIOIN: [0.42531312 0.0561378  0.07969475]
reward: 7.551129937171936
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.5692887  0.9828292  0.60287565]
reward: 7.810664892196655
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6705821  -0.6361075  -0.26165074]
reward: 6.431659638881683
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.12633795 -0.81155664  0.4390917 ]
reward: 6.6230137050151825
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8488968  -0.25508597  0.6231181 ]
reward: 6.272899121046066
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.735858   -0.15125595 -0.89913875]
reward: 6.213747277855873
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6909219  -0.45082557 -0.0530991 ]
reward: 6.805153425782919
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.55901283  0.6854784   0.79224867]
reward: 8.4522425532341
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5275482  -0.42606482  0.9900885 ]
reward: 6.056298464536667
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.10506857 -0.068913

ACTIOIN: [-0.2551408  -0.14676017 -0.76938635]
reward: 7.3389942944049835
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5703221  -0.38916382  0.21266086]
reward: 6.827853217720985
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.69520307  0.18141516  0.49441415]
reward: 8.382204070687294
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7083531   0.20140988 -0.10306794]
reward: 8.8066950365901
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.77859175 0.8550594  0.50825214]
reward: 7.568215489387512
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8326525  -0.36051625 -0.86470556]
reward: 7.607430696487427
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9563337  -0.4610596  -0.54936486]
reward: 6.033241838216782
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.3498863   0.38242653 -0.7248785 ]
reward: 8.007434338331223
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.25625667  0.26280752 -0.58243454]
reward: 7.424116313457489
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.17271239 0.73443025 0.1544068

ACTIOIN: [ 0.5373333   0.00780483 -0.67139345]
reward: 6.799078064970672
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.587237   -0.2687341  -0.85221046]
reward: 7.466292440891266
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6596843  -0.20290038 -0.35784993]
reward: 6.779565393924713
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.20610791  0.05922882  0.39925608]
reward: 7.866080652922392
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.58687156  0.68062323  0.0154771 ]
reward: 9.252017701976001
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8045606  -0.26996306  0.9618191 ]
reward: 7.572778433561325
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5923672   0.55735135  0.10955747]
reward: 9.040161050856113
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.6344311  0.29998347 0.18544038]
reward: 7.480111971497536
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7122495  -0.5831086  -0.04279052]
reward: 6.661851353943348
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.01324266 0.5247027  0.897673

ACTIOIN: [-0.6401269   0.97767127  0.6904152 ]
reward: 8.927382946014404
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.90464973  0.9552142  -0.5130974 ]
reward: 9.346766531467438
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.82788557 0.91639674 0.10407883]
reward: 7.984432339668274
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.824054   -0.9244503  -0.10256745]
reward: 7.797036275267601
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.48042998 -0.38539353  0.3072238 ]
reward: 6.826952695846558
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7485099   0.24647057 -0.93101263]
reward: 6.566948056221008
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8990169   0.95171493 -0.66743743]
reward: 9.183294415473938
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.31596825 -0.1814827   0.8943328 ]
reward: 7.240152716636658
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.242427    0.2592159   0.82558775]
reward: 7.676055148243904
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8662954   0.83408743  0.748

Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7216368 -0.5635948  0.8732693]
reward: 7.284772634506226
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4160527  -0.9180694  -0.15433551]
reward: 7.343647763133049
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5375388  -0.87043566  0.55769897]
reward: 6.034326553344727
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.803932   -0.15530415  0.5801442 ]
reward: 8.068483635783195
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.15129967 -0.54064095 -0.7850327 ]
reward: 6.523026689887047
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4825127 -0.64295   -0.6737599]
reward: 7.165802836418152
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5831019   0.674704    0.35469198]
reward: 8.903113961219788
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.9326851  0.98919606 0.7005293 ]
reward: 7.355981707572937
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.23923111 -0.9793348   0.06478883]
reward: 7.195107512176037
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.

ACTIOIN: [-0.07097776  0.9508423   0.7345983 ]
reward: 8.287221804261208
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.22279795 -0.51073074  0.41984785]
reward: 7.2922193557024
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.938594   -0.9460728  -0.12025969]
reward: 5.99507350474596
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.25342876 -0.28263286 -0.61491656]
reward: 7.355879336595535
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.21725625  0.3089763   0.21902493]
reward: 8.307207614183426
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7266556  -0.9221171   0.08817308]
reward: 7.716365411877632
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.03862431 -0.82220024  0.49833995]
reward: 6.718084119260311
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9248634  0.8797631 -0.4206295]
reward: 9.383997023105621
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.638331   -0.03121422  0.57645816]
reward: 8.0306586176157
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.18222754  0.73107827  0.832555  

ACTIOIN: [-0.6335311  -0.64567053 -0.90877223]
reward: 7.07908833026886
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7950995  -0.09369648  0.3721098 ]
reward: 8.329293213784695
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.85662    -0.3795984   0.20651284]
reward: 6.557268738746643
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.65778846 -0.6538749   0.6047115 ]
reward: 7.399202108383179
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.38926885  0.84755087 -0.8427338 ]
reward: 8.39408591389656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.0082641  -0.86547554 -0.3785484 ]
reward: 6.764240154065192
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.45020264 -0.71753424  0.03493169]
reward: 6.797331426292658
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6535878 -0.9204695  0.0545074]
reward: 6.371435258537531
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.10124081  0.5261705   0.7501058 ]
reward: 7.877305507659912
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.91650605 -0.72360283  0.84785

ACTIOIN: [ 0.19792083 -0.6844694  -0.7613568 ]
reward: 6.356252998113632
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8470502   0.83458084 -0.8802966 ]
reward: 8.80133444070816
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.9366771  0.49866405 0.70688325]
reward: 6.855103701353073
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.42970234 -0.21103297 -0.18665546]
reward: 7.1726092249155045
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.23958133 -0.19095722 -0.12634835]
reward: 7.9222757667303085
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.19708063 0.64630485 0.33099064]
reward: 8.11823357641697
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7242513  -0.29824606  0.90113866]
reward: 7.524866551160812
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9052178   0.25277543 -0.44047183]
reward: 8.71752142906189
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.45551106  0.7728309   0.37052736]
reward: 8.857814610004425
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7270923   0.0732047   0.7644520

ACTIOIN: [ 0.95212036 -0.14587873  0.25048077]
reward: 6.651520133018494
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.01395168 -0.7932671  -0.8926986 ]
reward: 6.32798601873219
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.33300474  0.90441513 -0.08574322]
reward: 8.485667169094086
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.3133057  -0.36591077 -0.95525175]
reward: 6.365531772375107
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8483955   0.52943265 -0.9977409 ]
reward: 8.38008725643158
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.82228625  0.65644246 -0.7875594 ]
reward: 8.691169321537018
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5560052  -0.22247235  0.7315356 ]
reward: 7.601997211575508
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.89139044 -0.69578576  0.7526886 ]
reward: 7.442916095256805
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.33736312  0.46394804 -0.8127603 ]
reward: 7.313824623823166
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9579063 -0.3903619  0.3372

ACTIOIN: [-0.05160055  0.85699147 -0.00815699]
reward: 8.900435028597713
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.3927965  -0.31397972  0.22345436]
reward: 7.855362415313721
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.68025017 -0.08317885  0.08860312]
reward: 7.1479678601026535
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.04809507  0.97499233 -0.9178236 ]
reward: 8.009073656052351
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.96793866 -0.00353314 -0.821905  ]
reward: 6.206623178673908
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.41485852 -0.56109226 -0.23215671]
reward: 6.791892513632774
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.31283233  0.03844503  0.9256103 ]
reward: 7.425667051225901
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.90843916  0.480106   -0.33228135]
reward: 7.239385485649109
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9562198   0.35502222 -0.8143178 ]
reward: 6.5844846069812775
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8207005   0.6412093  -

ACTIOIN: [-0.5517704  -0.6216865  -0.65243196]
reward: 7.277651906013489
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.92765844 -0.6098478  -0.70613235]
reward: 5.756361424922943
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6710448  -0.9369796  -0.98155797]
reward: 6.752507269382477
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7642924   0.64127105 -0.43957177]
reward: 7.4374068677425385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.4558967  -0.6734829   0.04402965]
reward: 6.82659075409174
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8882     -0.35550916  0.68735236]
reward: 6.068938493728638
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.43427128 -0.21897492  0.32693028]
reward: 7.88836607336998
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8409915   0.5490868  -0.46850353]
reward: 7.2395917773246765
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.08625346  0.9846623  -0.7040155 ]
reward: 8.194393336772919
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9842303  -0.98025566 -0.

Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.75135    -0.5234629   0.40978807]
reward: 6.3153990507125854
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6720366   0.8560489  -0.14805262]
reward: 9.38003285229206
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.55635184 0.7613857  0.1739244 ]
reward: 8.031109437346458
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.12679969 -0.3882505   0.6026097 ]
reward: 7.135939493775368
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6857913 -0.7494458 -0.9293625]
reward: 5.635400414466858
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.63580537 -0.8050334  -0.10742661]
reward: 6.451734632253647
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6259459  -0.82897323 -0.27951878]
reward: 7.517453908920288
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5427803   0.7863692   0.57301986]
reward: 8.756129622459412
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.29300228 -0.9393053   0.34380338]
reward: 7.009893596172333
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: 

ACTIOIN: [-0.5707956   0.7447492   0.69830304]
reward: 8.617241740226746
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6777457  -0.70202196  0.8013681 ]
reward: 7.174355626106262
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.41276518 -0.00233432  0.7124091 ]
reward: 6.872491421876475
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.80208224 -0.27076623 -0.91424227]
reward: 7.617073744535446
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.3616871  -0.30316722 -0.4920868 ]
reward: 6.843058884143829
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7200163 -0.5332281 -0.7281286]
reward: 7.458659589290619
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6013649   0.8741026   0.16626123]
reward: 9.309206277132034
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.9696829  -0.18812022 -0.75831294]
reward: 6.083883970975876
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.08854187 -0.22806562  0.35952726]
reward: 7.323865249752998
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.0117405   0.39748618 -0.584

Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.6974357  0.82735   -0.7453475]
reward: 7.384566843509674
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.785231   -0.14410777  0.8755909 ]
reward: 7.765532299876213
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5823462   0.21300325 -0.5467509 ]
reward: 7.083906143903732
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.39094704 -0.8882764   0.43628114]
reward: 6.284495413303375
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.7172187  -0.478465    0.82350326]
reward: 5.980813056230545
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.05786618  0.365724    0.5153164 ]
reward: 7.9082737527787685
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.43216676 -0.30274522  0.25019643]
reward: 7.879225105047226
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.73621565 -0.86333597 -0.52987266]
reward: 7.343007028102875
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.43053138 -0.22387701  0.18670103]
reward: 7.158890575170517
Observation: [0 1 2]
obs: [0 1 2]
ACTIO

ACTIOIN: [-0.7991878  0.4056627 -0.5362201]
reward: 8.66863039135933
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.01803172  0.33455214  0.9913929 ]
reward: 7.36119095236063
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.13762335 -0.17235224  0.36114183]
reward: 7.328882575035095
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.82561356 0.18226065 0.71867776]
reward: 6.637969329953194
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9674518   0.99783444  0.00431827]
reward: 9.960967984981835
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.95168406 0.82076365 0.36292967]
reward: 7.506149917840958
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.3588642  0.3261683 -0.8573445]
reward: 7.109959602355957
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.14951186 -0.8265758  -0.90107805]
reward: 6.421857997775078
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6576876   0.7277487   0.81237566]
reward: 8.573060631752014
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.73658574 -0.08482952  0.86794657]
rewa

ACTIOIN: [-0.8570761   0.060115    0.73128736]
reward: 8.185903750360012
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.40458584 -0.02367226  0.8840447 ]
reward: 7.496868871152401
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.8894554  -0.4731508  -0.17880172]
reward: 6.458592116832733
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8216855  -0.95700055  0.8059418 ]
reward: 7.058743119239807
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.68193555 -0.30710608 -0.5913091 ]
reward: 6.419649302959442
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.86418784 -0.37357882  0.9068459 ]
reward: 7.583763092756271
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4680562   0.41318926 -0.51958466]
reward: 8.361660808324814
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.3556135  -0.7521627   0.61043733]
reward: 6.281786471605301
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.27353105  0.7720578   0.4175836 ]
reward: 8.62800520658493
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.553815   0.51234955 0.3248

ACTIOIN: [-0.3329436  0.8175953 -0.9169258]
reward: 8.233613103628159
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.426387    0.19177538 -0.98128134]
reward: 6.784107029438019
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.10208629  0.6132747   0.23214255]
reward: 8.483218431472778
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.28235736 -0.72188765 -0.6475481 ]
reward: 6.912921637296677
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.37001175 -0.71414876 -0.95608366]
reward: 6.6997793316841125
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5455471   0.9291547   0.10695346]
reward: 9.367748364806175
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6964751   0.36924762  0.60793155]
reward: 8.457791149616241
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.933578   -0.64590096  0.22089712]
reward: 6.1996238976716995
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7902341   0.3072093  -0.31525284]
reward: 8.782190561294556
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7438208  -0.9768619   0.9

ACTIOIN: [-0.36928684  0.91569537  0.00213906]
reward: 9.282843147171661
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.9702908  0.08075866 0.88639414]
reward: 6.224073737859726
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9716362  -0.6593482   0.80928016]
reward: 7.5030078291893005
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.04648834  0.14884122 -0.8920698 ]
reward: 7.3032597452402115
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.67696714 -0.40751782  0.6782796 ]
reward: 7.591169744729996
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.59879375 -0.31373838 -0.84428835]
reward: 7.440767019987106
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.03550496  0.75061846  0.4372485 ]
reward: 8.348874922841787
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.6662     0.04075912 0.86316633]
reward: 6.511392809450626
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8021686  -0.45787597  0.5469742 ]
reward: 7.797318458557129
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.01894997 0.822388   0.5961007

ACTIOIN: [-0.85970086  0.9351783  -0.06832793]
reward: 9.726551212370396
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6193831  -0.23795567 -0.7722982 ]
reward: 7.6091292053461075
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.5111966  -0.44870928  0.25476864]
reward: 7.8077186942100525
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.64398396 -0.73955584 -0.12374097]
reward: 7.780687153339386
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8272894  -0.12311023  0.18413971]
reward: 8.520039461553097
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.26140088 -0.70659244  0.7624953 ]
reward: 6.26951140165329
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.39211807  0.06540696 -0.0172805 ]
reward: 7.656008394435048
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.82869864 -0.02019637  0.64070034]
reward: 6.510404657572508
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.55142504 -0.3005549   0.18920083]
reward: 8.061669304966927
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.89312553 -0.6585012   0

ACTIOIN: [ 0.01128838 -0.9541681   0.0657183 ]
reward: 6.96882523689419
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5873633   0.77078056 -0.6833949 ]
reward: 7.500022351741791
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.7528493   0.65774596 -0.9403759 ]
reward: 8.470219314098358
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.9660884  -0.9564804   0.07485767]
reward: 7.934750363230705
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.01649104 -0.25649068 -0.6939383 ]
reward: 7.0330799631774426
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.5532804  -0.08298286  0.03634116]
reward: 7.32739556953311
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [0.15780008 0.86168665 0.23121901]
reward: 8.472667559981346
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.68745196  0.27322894 -0.86582994]
reward: 6.719947040081024
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.6324528  -0.7967088  -0.90808946]
reward: 6.927654504776001
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [ 0.74270314 -0.22390462 -0.8873

  Variable._execution_engine.run_backward(


Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACT

ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.00231678]
reward: 9.748246488161385
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8762025   0.87436074  0.

ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.00227273]
reward: 9.745675793616101
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8748993   0.8730492   0.

ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.

ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.00226142]
reward: 9.744846443878487
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8744808   0.8726271   0.

ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.00215078]
reward: 9.736360360402614
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87020147  0.8683097   0.

reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.87343085  0.87157357  0.00221832]
reward: 9.742786091286689
Observation

ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]
reward: 9.73491798620671
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8694743   0.86757606  0.00213237]

ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.

ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.00208123]
reward: 8.978175207274035
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.4923646   0.48789185 -0.

ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.00244072]
reward: 9.759090868523344
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8816572   0.8798744   0.

ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.00239298]
reward: 9.755790499504656
Observation: [0 1 2]
obs: [0 1 2]
ACTIOIN: [-0.8799905   0.87819296  0.

AttributeError: 'task' object has no attribute '_goal'