In [15]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
import numpy as np
import tensorboard
import optuna

# Install tqdm if not already installed
from tqdm.autonotebook import tqdm as notebook_tqdm

In [16]:
def make_env(reset_noise_scale, forward_reward_weight, ctrl_cost_weight, healthy_reward, contact_cost_weight, healthy_z_range, contact_force_range):
    """
    Crea e restituisce l'ambiente Ant-v5 dalla libreria Gymnasium con i parametri specificati.
    """
    # Ant-v5 è l’ambiente più recente in Gymnasium.
    return gym.make("Ant-v5", 
                    reset_noise_scale=reset_noise_scale, 
                    forward_reward_weight=forward_reward_weight, 
                    ctrl_cost_weight=ctrl_cost_weight, 
                    healthy_reward=healthy_reward, 
                    contact_cost_weight = contact_cost_weight,
                    healthy_z_range=healthy_z_range,
                    contact_force_range=contact_force_range)
                   # render_mode='none')

In [17]:
    # reset_noise_scale = trial.suggest_float('reset_noise_scale', 0.05, 0.2)           # Default circa 0.1; esploriamo da 0.05 a 0.2
    # forward_reward_weight = trial.suggest_float('forward_reward_weight', 0.5, 1.5)     # Default tipico è 1; esploriamo da 0.5 a 1.5
    # ctrl_cost_weight = trial.suggest_float('ctrl_cost_weight', 0.1, 1.0)               # Default tipico 0.5; esploriamo da 0.1 a 1.0
    # healthy_reward = trial.suggest_float('healthy_reward', 0.5, 1.5)                   # Default tipico 1; esploriamo da 0.5 a 1.5
    
    # # Parametri aggiuntivi per Ant-v5
    # contact_cost_weight = trial.suggest_float('contact_cost_weight', 1e-4, 1e-3)  # Es. range intorno a 5e-4 come default
    # healthy_z_lower = trial.suggest_float('healthy_z_lower', 0.1, 0.3)             # Per definire l'intervallo di altezze "sane"
    # healthy_z_upper = trial.suggest_float('healthy_z_upper', 0.8, 1.2)
    # contact_force_min = trial.suggest_float('contact_force_min', -1.0, -0.5)         # Modificabile se usi forze di contatto
    # contact_force_max = trial.suggest_float('contact_force_max', 0.5, 1.0)


    # learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    # n_steps = trial.suggest_int('n_steps', 2048, 8192, step=2048)
    # batch_size = trial.suggest_categorical('batch_size', [64, 128, 256])  
    # # Per ambienti complessi come Ant, molti esperimenti usano gamma intorno a 0.99-0.995
    # gamma = trial.suggest_float('gamma', 0.99, 0.999)
    # gae_lambda = trial.suggest_float('gae_lambda', 0.8, 1.0)
    # clip_range = trial.suggest_float('clip_range', 0.1, 0.3) 
    # ent_coef = trial.suggest_float('ent_coef', 0.0, 0.1)

In [18]:
# Hyperparameter tuning con Optuna

def objective(trial):
    # Parametri dell'environment
    reset_noise_scale = trial.suggest_float('reset_noise_scale', 0.05, 0.2)           # Default circa 0.1; esploriamo da 0.05 a 0.2
    forward_reward_weight = trial.suggest_float('forward_reward_weight', 0.5, 1.5)     # Default tipico è 1; esploriamo da 0.5 a 1.5
    ctrl_cost_weight = trial.suggest_float('ctrl_cost_weight', 0.1, 1.0)               # Default tipico 0.5; esploriamo da 0.1 a 1.0
    healthy_reward = trial.suggest_float('healthy_reward', 0.5, 1.5)                   # Default tipico 1; esploriamo da 0.5 a 1.5
    
    # Parametri aggiuntivi per Ant-v5
    contact_cost_weight = trial.suggest_float('contact_cost_weight', 1e-4, 1e-3)  # Es. range intorno a 5e-4 come default
    healthy_z_lower = trial.suggest_float('healthy_z_lower', 0.1, 0.3)             # Per definire l'intervallo di altezze "sane"
    healthy_z_upper = trial.suggest_float('healthy_z_upper', 0.8, 1.2)
    contact_force_min = trial.suggest_float('contact_force_min', -1.0, -0.5)         # Modificabile se usi forze di contatto
    contact_force_max = trial.suggest_float('contact_force_max', 0.5, 1.0)

    # Crea l'ambiente passando tutti i parametri
    env = make_env(
        reset_noise_scale,
        forward_reward_weight,
        ctrl_cost_weight,
        healthy_reward,
        contact_cost_weight=contact_cost_weight,
        healthy_z_range=(healthy_z_lower, healthy_z_upper),
        contact_force_range=(contact_force_min, contact_force_max)
    )
    env = DummyVecEnv([lambda: env])
    env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # Iperparametri per il modello PPO
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
    n_steps = trial.suggest_int('n_steps', 2048, 8192, step=2048)
    batch_size = trial.suggest_categorical('batch_size', [512, 1024, 2048, 4096])  
    # Per ambienti complessi come Ant, molti esperimenti usano gamma intorno a 0.99-0.995
    gamma = trial.suggest_float('gamma', 0.965, 0.98)
    gae_lambda = trial.suggest_float('gae_lambda', 0.9, 1.0)
    clip_range = trial.suggest_float('clip_range', 0.3, 0.5) 
    ent_coef = trial.suggest_float('ent_coef', 0.0, 0.1)
    
    # Nuovo iperparametro per la penalizzazione della varianza
    std_penalty_weight = trial.suggest_float('std_penalty_weight', 0.0, 0.5)

    # Crea ed allena il modello PPO
    model = PPO("MlpPolicy", env,
                learning_rate=learning_rate,
                n_steps=n_steps,
                batch_size=batch_size,
                gamma=gamma,
                gae_lambda=gae_lambda,
                clip_range=clip_range,
                ent_coef=ent_coef,
                seed=42,
                verbose=0)
    model.learn(total_timesteps=200000)

    # Valuta il modello su 200 episodi (200 è ottimale)
    episodes = 150

    episode_rewards = []
    for episode in range(episodes):
        obs = env.reset()
        done = False
        episode_reward = 0
        while not done:
            action, _states = model.predict(obs)
            obs, reward, done, info = env.step(action)
            episode_reward += reward
        episode_rewards.append(episode_reward)

    # Calcola reward media e varianza
    mean_reward = np.mean(episode_rewards)
    reward_std = np.std(episode_rewards)

    # Definisce l'obiettivo: massimizzare la reward media penalizzando la varianza
    score = mean_reward - std_penalty_weight * reward_std

    print(f'Mean is: {mean_reward}, Std is: {reward_std}\n')

    return score

# Crea uno studio Optuna e ottimizza l'obiettivo
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=300)

# Stampa i migliori iperparametri trovati
print("Best hyperparameters: ", study.best_params)

[I 2025-02-12 17:51:23,514] A new study created in memory with name: no-name-1299e639-7b0b-4f75-861b-91da70fccdd3
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-3)
[I 2025-02-12 17:52:31,664] Trial 0 finished with value: -2.2595818042755127 and parameters: {'reset_noise_scale': 0.10550150920629535, 'forward_reward_weight': 1.1310290950229411, 'ctrl_cost_weight': 0.656175542456229, 'healthy_reward': 1.402054053468586, 'contact_cost_weight': 0.00011460398761631547, 'healthy_z_lower': 0.27796109976617994, 'healthy_z_upper': 1.102818142370627, 'contact_force_min': -0.9224021861038963, 'contact_force_max': 0.8865754974915587, 'learning_rate': 5.332593661181692e-05, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9781508604425411, 'gae_lambda': 0.9620024837203535, 'clip_range': 0.43784220445501637, 'ent_coef': 0.01650888907499446, 'std_penalty_weight': 0.11508521672790645}. Best is trial 0 with value: -2.2595818042755127.


Mean is: -2.0479979515075684, Std is: 1.8384965658187866



[I 2025-02-12 17:53:52,547] Trial 1 finished with value: 6.8863396644592285 and parameters: {'reset_noise_scale': 0.061516859181143226, 'forward_reward_weight': 0.9468327954193553, 'ctrl_cost_weight': 0.2170985768679085, 'healthy_reward': 1.2922614989484824, 'contact_cost_weight': 0.0005849111914860173, 'healthy_z_lower': 0.2044310032992321, 'healthy_z_upper': 1.137775407579622, 'contact_force_min': -0.845824754293711, 'contact_force_max': 0.7440804407728538, 'learning_rate': 9.278620006867075e-05, 'n_steps': 8192, 'batch_size': 4096, 'gamma': 0.9718478602122956, 'gae_lambda': 0.9010149714316421, 'clip_range': 0.3036494041215037, 'ent_coef': 0.06470603172295075, 'std_penalty_weight': 0.011711804278705829}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 7.000239372253418, Std is: 9.725189208984375



[I 2025-02-12 17:55:03,039] Trial 2 finished with value: -5.570282936096191 and parameters: {'reset_noise_scale': 0.10351098208385554, 'forward_reward_weight': 1.0461443882426187, 'ctrl_cost_weight': 0.3290557766230068, 'healthy_reward': 0.6409966912089246, 'contact_cost_weight': 0.0007437459013269457, 'healthy_z_lower': 0.16176842222262425, 'healthy_z_upper': 1.1319821491367066, 'contact_force_min': -0.5680651007429471, 'contact_force_max': 0.7916323856222833, 'learning_rate': 0.000411221496590389, 'n_steps': 4096, 'batch_size': 1024, 'gamma': 0.9707153321548935, 'gae_lambda': 0.9163930904551032, 'clip_range': 0.47904058916669406, 'ent_coef': 0.07546370422744655, 'std_penalty_weight': 0.4224321898813669}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -2.7178549766540527, Std is: 6.752392292022705



We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=2048 and n_envs=1)
[I 2025-02-12 17:56:14,588] Trial 3 finished with value: 4.6528000831604 and parameters: {'reset_noise_scale': 0.15001286627760965, 'forward_reward_weight': 1.3144449018160789, 'ctrl_cost_weight': 0.17342669196106247, 'healthy_reward': 1.3859462459223004, 'contact_cost_weight': 0.00014231927673058302, 'healthy_z_lower': 0.1463748382501555, 'healthy_z_upper': 1.0631288386206452, 'contact_force_min': -0.7764684341302559, 'contact_force_max': 0.8741845986698114, 'learning_rate': 5.036945862786653e-05, 'n_steps': 2048, 'batch_size': 4096, 'gamma': 0.966358138093537, 'gae_lambda': 0.9812618438072064, 'clip_range': 0.30246865507779946, 'ent_coef': 0.06532962457212643, 'std_penalty_weight': 0.025616397776681454}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 4.877623558044434, Std is: 8.7765474319458



[I 2025-02-12 17:57:20,883] Trial 4 finished with value: -0.763779878616333 and parameters: {'reset_noise_scale': 0.11063459775159278, 'forward_reward_weight': 0.6201516428239755, 'ctrl_cost_weight': 0.5571170457196725, 'healthy_reward': 1.4711649233512412, 'contact_cost_weight': 0.0006623171715416224, 'healthy_z_lower': 0.14991783055618982, 'healthy_z_upper': 0.8358647669941488, 'contact_force_min': -0.8470795595966778, 'contact_force_max': 0.6776951033953942, 'learning_rate': 3.199202647267462e-05, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9723936108149489, 'gae_lambda': 0.9271329274235571, 'clip_range': 0.3242766548933484, 'ent_coef': 0.03716932105700369, 'std_penalty_weight': 0.004240969483603529}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -0.7609449028968811, Std is: 0.6684694886207581



[I 2025-02-12 17:58:25,896] Trial 5 finished with value: -0.7209827899932861 and parameters: {'reset_noise_scale': 0.06210377931045539, 'forward_reward_weight': 1.3671712165601226, 'ctrl_cost_weight': 0.66463119937505, 'healthy_reward': 1.1137342788908606, 'contact_cost_weight': 0.00038195393479656625, 'healthy_z_lower': 0.1331414826241683, 'healthy_z_upper': 0.8489505519418192, 'contact_force_min': -0.5656679123942236, 'contact_force_max': 0.8402773498395728, 'learning_rate': 0.00048250730857802817, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9762395555843779, 'gae_lambda': 0.9976511554377238, 'clip_range': 0.3927524321713018, 'ent_coef': 0.051097616180146466, 'std_penalty_weight': 0.4233597284100665}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -0.5397502183914185, Std is: 0.4280816912651062



[I 2025-02-12 17:59:31,744] Trial 6 finished with value: 1.4075632095336914 and parameters: {'reset_noise_scale': 0.18288712059865425, 'forward_reward_weight': 0.5298807247387377, 'ctrl_cost_weight': 0.3354922905414297, 'healthy_reward': 0.9544430200705705, 'contact_cost_weight': 0.00024117283755003172, 'healthy_z_lower': 0.2683418195396474, 'healthy_z_upper': 0.9467389308290445, 'contact_force_min': -0.9346033804614085, 'contact_force_max': 0.7026323386503308, 'learning_rate': 0.000903000780993366, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9685378792094911, 'gae_lambda': 0.9758215684578931, 'clip_range': 0.3980310689665105, 'ent_coef': 0.005600628195874757, 'std_penalty_weight': 0.1339598116549775}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 1.6537342071533203, Std is: 1.837647795677185



[I 2025-02-12 18:00:38,764] Trial 7 finished with value: 0.9740458726882935 and parameters: {'reset_noise_scale': 0.05415368203335558, 'forward_reward_weight': 1.2653849895678353, 'ctrl_cost_weight': 0.23505971050164254, 'healthy_reward': 1.3003108660414087, 'contact_cost_weight': 0.0009660613882998289, 'healthy_z_lower': 0.2244259730670371, 'healthy_z_upper': 0.8457258446681918, 'contact_force_min': -0.9747741486042978, 'contact_force_max': 0.7084285403920428, 'learning_rate': 4.478767099862318e-05, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9651606997352946, 'gae_lambda': 0.9169834671057494, 'clip_range': 0.32049348461836297, 'ent_coef': 0.029390576597816645, 'std_penalty_weight': 0.11247275065853019}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 1.136149525642395, Std is: 1.441270351409912



[I 2025-02-12 18:01:44,346] Trial 8 finished with value: -2.4189038276672363 and parameters: {'reset_noise_scale': 0.09046557267897193, 'forward_reward_weight': 0.8819555609641846, 'ctrl_cost_weight': 0.55005629968532, 'healthy_reward': 0.9191917520709515, 'contact_cost_weight': 0.0008363490075930277, 'healthy_z_lower': 0.27742824145701933, 'healthy_z_upper': 0.9377610048937389, 'contact_force_min': -0.9660062093390751, 'contact_force_max': 0.8564049285241755, 'learning_rate': 1.3653474858723789e-05, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9741954335827133, 'gae_lambda': 0.9528017177485105, 'clip_range': 0.4643343992031769, 'ent_coef': 0.08771690465078485, 'std_penalty_weight': 0.1642583380478534}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -2.157010555267334, Std is: 1.594398856163025



[I 2025-02-12 18:02:48,559] Trial 9 finished with value: -1.6820695400238037 and parameters: {'reset_noise_scale': 0.10018521276103919, 'forward_reward_weight': 1.4185000869506914, 'ctrl_cost_weight': 0.937049230608272, 'healthy_reward': 0.6454658096877269, 'contact_cost_weight': 0.00011229074090495766, 'healthy_z_lower': 0.14341638247589797, 'healthy_z_upper': 0.8940534904328652, 'contact_force_min': -0.6953920490558474, 'contact_force_max': 0.6536549882836657, 'learning_rate': 1.1688890909773184e-05, 'n_steps': 2048, 'batch_size': 2048, 'gamma': 0.9768587888917897, 'gae_lambda': 0.9574507734624046, 'clip_range': 0.4273683973543567, 'ent_coef': 0.04794105623958764, 'std_penalty_weight': 0.07483013413911949}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -1.431943655014038, Std is: 3.3425815105438232



[I 2025-02-12 18:04:10,866] Trial 10 finished with value: -21.481599807739258 and parameters: {'reset_noise_scale': 0.14180491834925849, 'forward_reward_weight': 0.8178844625347171, 'ctrl_cost_weight': 0.922121142804913, 'healthy_reward': 1.162041110119592, 'contact_cost_weight': 0.00046696550418403725, 'healthy_z_lower': 0.20724026367662285, 'healthy_z_upper': 1.196697428781267, 'contact_force_min': -0.6942253531985401, 'contact_force_max': 0.5862435746015868, 'learning_rate': 0.00016324741851781143, 'n_steps': 8192, 'batch_size': 4096, 'gamma': 0.9704857913117058, 'gae_lambda': 0.9008017932022296, 'clip_range': 0.3563993237491578, 'ent_coef': 0.09991154593708876, 'std_penalty_weight': 0.29170043144291696}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -16.318906784057617, Std is: 17.6986141204834



[I 2025-02-12 18:05:20,504] Trial 11 finished with value: 3.0631351470947266 and parameters: {'reset_noise_scale': 0.148890941240408, 'forward_reward_weight': 1.180329792628364, 'ctrl_cost_weight': 0.11427104677135541, 'healthy_reward': 1.26650464863852, 'contact_cost_weight': 0.0005697388341308392, 'healthy_z_lower': 0.10927166382560403, 'healthy_z_upper': 1.0310549416785237, 'contact_force_min': -0.7976434346201101, 'contact_force_max': 0.9477090810065923, 'learning_rate': 0.00012047053228221793, 'n_steps': 8192, 'batch_size': 4096, 'gamma': 0.9653361537826951, 'gae_lambda': 0.9892078310623474, 'clip_range': 0.30194959809706373, 'ent_coef': 0.0671060346017541, 'std_penalty_weight': 0.005235517843106715}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 3.0969059467315674, Std is: 6.450342655181885



We recommend using a `batch_size` that is a factor of `n_steps * n_envs`.
Info: (n_steps=6144 and n_envs=1)
[I 2025-02-12 18:06:31,823] Trial 12 finished with value: 3.417271852493286 and parameters: {'reset_noise_scale': 0.17790243804928732, 'forward_reward_weight': 0.9114077212247793, 'ctrl_cost_weight': 0.12086757440415208, 'healthy_reward': 1.4898940135926702, 'contact_cost_weight': 0.00034490670839408565, 'healthy_z_lower': 0.1772975712833602, 'healthy_z_upper': 1.054279128058285, 'contact_force_min': -0.8403006565122295, 'contact_force_max': 0.9503634774573958, 'learning_rate': 8.134021131981344e-05, 'n_steps': 6144, 'batch_size': 4096, 'gamma': 0.967768166865959, 'gae_lambda': 0.9335868133767052, 'clip_range': 0.3539160347689638, 'ent_coef': 0.06251612502830417, 'std_penalty_weight': 0.2170524798465777}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 5.438382148742676, Std is: 9.311620712280273



[I 2025-02-12 18:07:50,942] Trial 13 finished with value: -7.3186140060424805 and parameters: {'reset_noise_scale': 0.1535938158989645, 'forward_reward_weight': 1.4918150314675807, 'ctrl_cost_weight': 0.3476860524590689, 'healthy_reward': 1.2759103268534133, 'contact_cost_weight': 0.000634613498450316, 'healthy_z_lower': 0.2450710760733038, 'healthy_z_upper': 1.1959405187927532, 'contact_force_min': -0.7146609125002726, 'contact_force_max': 0.7705082800714662, 'learning_rate': 0.0002199714546831595, 'n_steps': 2048, 'batch_size': 4096, 'gamma': 0.9680095955235131, 'gae_lambda': 0.9754701796037066, 'clip_range': 0.33967335129864945, 'ent_coef': 0.07745081985864401, 'std_penalty_weight': 0.3048093775225276}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -5.075219631195068, Std is: 7.359990119934082



[I 2025-02-12 18:09:03,063] Trial 14 finished with value: 2.7459359169006348 and parameters: {'reset_noise_scale': 0.1303975028658479, 'forward_reward_weight': 0.7043783200640799, 'ctrl_cost_weight': 0.2202485348886163, 'healthy_reward': 1.0754309027296813, 'contact_cost_weight': 0.0004712861721391593, 'healthy_z_lower': 0.1972242064339443, 'healthy_z_upper': 1.105919178697288, 'contact_force_min': -0.7879831190435319, 'contact_force_max': 0.5139105649332982, 'learning_rate': 3.1268167577820096e-05, 'n_steps': 4096, 'batch_size': 4096, 'gamma': 0.9740046514993421, 'gae_lambda': 0.9395977176203617, 'clip_range': 0.3013486678086366, 'ent_coef': 0.05594108403889887, 'std_penalty_weight': 0.04459149277498503}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 2.998999834060669, Std is: 5.675162315368652



[I 2025-02-12 18:10:15,935] Trial 15 finished with value: -6.19373083114624 and parameters: {'reset_noise_scale': 0.07246132660380145, 'forward_reward_weight': 1.055870568281627, 'ctrl_cost_weight': 0.4224667216107877, 'healthy_reward': 0.8376261974362549, 'contact_cost_weight': 0.0002439902988765731, 'healthy_z_lower': 0.18013376788864677, 'healthy_z_upper': 1.0573079337190243, 'contact_force_min': -0.651488902751872, 'contact_force_max': 0.9935242066445633, 'learning_rate': 7.656763735260732e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9701536179096968, 'gae_lambda': 0.9725314037085521, 'clip_range': 0.3762534128528261, 'ent_coef': 0.03719489323502741, 'std_penalty_weight': 0.20859735326930828}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -4.4016523361206055, Std is: 8.591089248657227



[I 2025-02-12 18:11:27,310] Trial 16 finished with value: 2.636225938796997 and parameters: {'reset_noise_scale': 0.08025352356929435, 'forward_reward_weight': 1.279168983212516, 'ctrl_cost_weight': 0.21539420366525974, 'healthy_reward': 1.3629362564908647, 'contact_cost_weight': 0.0008069672113580686, 'healthy_z_lower': 0.10284700505016856, 'healthy_z_upper': 0.986259785888202, 'contact_force_min': -0.8726736883191395, 'contact_force_max': 0.8038912147581037, 'learning_rate': 0.000263191757727693, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9795963299512062, 'gae_lambda': 0.9431753073832824, 'clip_range': 0.32554081297213167, 'ent_coef': 0.08343534856034604, 'std_penalty_weight': 0.06376750659024133}. Best is trial 1 with value: 6.8863396644592285.


Mean is: 2.9567224979400635, Std is: 5.026017189025879



[I 2025-02-12 18:12:47,444] Trial 17 finished with value: -14.490020751953125 and parameters: {'reset_noise_scale': 0.1979000663227896, 'forward_reward_weight': 0.9622603214770503, 'ctrl_cost_weight': 0.4280098603407767, 'healthy_reward': 1.2024400838628653, 'contact_cost_weight': 0.0005042432252133529, 'healthy_z_lower': 0.23324464037371803, 'healthy_z_upper': 1.1471411046105713, 'contact_force_min': -0.7715242835752634, 'contact_force_max': 0.9111549270471161, 'learning_rate': 2.0825536705562142e-05, 'n_steps': 2048, 'batch_size': 4096, 'gamma': 0.9667883728861725, 'gae_lambda': 0.907195850516993, 'clip_range': 0.3672532211894248, 'ent_coef': 0.06731866023710016, 'std_penalty_weight': 0.350332425348462}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -10.115646362304688, Std is: 12.486351013183594



[I 2025-02-12 18:13:55,879] Trial 18 finished with value: -5.984042644500732 and parameters: {'reset_noise_scale': 0.16951841331308265, 'forward_reward_weight': 0.8268503990087377, 'ctrl_cost_weight': 0.7900918293983825, 'healthy_reward': 1.056533459839554, 'contact_cost_weight': 0.00035916010476716816, 'healthy_z_lower': 0.12314629605641421, 'healthy_z_upper': 1.0002386052544439, 'contact_force_min': -0.6226147656087715, 'contact_force_max': 0.6010838341706395, 'learning_rate': 0.00011418581231541063, 'n_steps': 6144, 'batch_size': 4096, 'gamma': 0.9721630737195673, 'gae_lambda': 0.9676199941095456, 'clip_range': 0.34028347652446883, 'ent_coef': 0.09924139083574098, 'std_penalty_weight': 0.48522939042181923}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -2.786142587661743, Std is: 6.59049129486084



[I 2025-02-12 18:15:07,630] Trial 19 finished with value: -8.86286449432373 and parameters: {'reset_noise_scale': 0.1253482407973607, 'forward_reward_weight': 1.1703394546681793, 'ctrl_cost_weight': 0.4474429821862419, 'healthy_reward': 0.5269065142293184, 'contact_cost_weight': 0.00024436532584542776, 'healthy_z_lower': 0.19841164779701365, 'healthy_z_upper': 1.0866495134153806, 'contact_force_min': -0.7528376166718119, 'contact_force_max': 0.7349633424978622, 'learning_rate': 5.006748223464999e-05, 'n_steps': 4096, 'batch_size': 4096, 'gamma': 0.9753162296822696, 'gae_lambda': 0.9887918550869701, 'clip_range': 0.4271092138345127, 'ent_coef': 0.0433674671212092, 'std_penalty_weight': 0.17099335532593632}. Best is trial 1 with value: 6.8863396644592285.


Mean is: -6.93728494644165, Std is: 11.261137962341309



[I 2025-02-12 18:16:28,878] Trial 20 finished with value: 9.566394805908203 and parameters: {'reset_noise_scale': 0.16233013344361696, 'forward_reward_weight': 0.7373231996262023, 'ctrl_cost_weight': 0.2555301862439734, 'healthy_reward': 1.3934550795035356, 'contact_cost_weight': 0.0009592595531996295, 'healthy_z_lower': 0.16920348484172637, 'healthy_z_upper': 1.1571559199763854, 'contact_force_min': -0.8752060254821176, 'contact_force_max': 0.8128671208460907, 'learning_rate': 2.7491753660640618e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9700303259934325, 'gae_lambda': 0.9846904513543453, 'clip_range': 0.30211764849372763, 'ent_coef': 0.05858421226519457, 'std_penalty_weight': 0.04393666684201866}. Best is trial 20 with value: 9.566394805908203.


Mean is: 10.10147762298584, Std is: 12.178498268127441



[I 2025-02-12 18:17:47,858] Trial 21 finished with value: 9.950752258300781 and parameters: {'reset_noise_scale': 0.16520472354234506, 'forward_reward_weight': 0.7395338030794335, 'ctrl_cost_weight': 0.1804483460193953, 'healthy_reward': 1.393578758599989, 'contact_cost_weight': 0.0009934002142959779, 'healthy_z_lower': 0.1715020692243751, 'healthy_z_upper': 1.1486729403778595, 'contact_force_min': -0.8969710677896138, 'contact_force_max': 0.83451315433875, 'learning_rate': 1.9510726046565766e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9689082146329557, 'gae_lambda': 0.9827196209825627, 'clip_range': 0.30017297133464677, 'ent_coef': 0.061640745360882425, 'std_penalty_weight': 0.044288286771767126}. Best is trial 21 with value: 9.950752258300781.


Mean is: 10.566864013671875, Std is: 13.911383628845215



[I 2025-02-12 18:19:08,417] Trial 22 finished with value: 9.676883697509766 and parameters: {'reset_noise_scale': 0.15965988379189838, 'forward_reward_weight': 0.7231275474551455, 'ctrl_cost_weight': 0.27764566237515714, 'healthy_reward': 1.4965794569371094, 'contact_cost_weight': 0.000980556868790347, 'healthy_z_lower': 0.17020342176982337, 'healthy_z_upper': 1.1553835856167736, 'contact_force_min': -0.8902355390367098, 'contact_force_max': 0.8168433896914922, 'learning_rate': 2.024801595836438e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9687945714520126, 'gae_lambda': 0.997742731880986, 'clip_range': 0.3174305431349233, 'ent_coef': 0.05754832944994527, 'std_penalty_weight': 0.08399384520124308}. Best is trial 21 with value: 9.950752258300781.


Mean is: 10.7125883102417, Std is: 12.330723762512207



[I 2025-02-12 18:20:29,132] Trial 23 finished with value: 8.563589096069336 and parameters: {'reset_noise_scale': 0.1655095756240699, 'forward_reward_weight': 0.7365024072558367, 'ctrl_cost_weight': 0.289179397969748, 'healthy_reward': 1.4960688028946623, 'contact_cost_weight': 0.0009983494841726808, 'healthy_z_lower': 0.17989597797378692, 'healthy_z_upper': 1.162739861793851, 'contact_force_min': -0.8977318127537564, 'contact_force_max': 0.8214987140670795, 'learning_rate': 1.9043159094543316e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9693994441145151, 'gae_lambda': 0.9988199934581654, 'clip_range': 0.3323972246740897, 'ent_coef': 0.05578862994215228, 'std_penalty_weight': 0.07844435641128748}. Best is trial 21 with value: 9.950752258300781.


Mean is: 9.415812492370605, Std is: 10.864051818847656



[I 2025-02-12 18:21:47,824] Trial 24 finished with value: 8.354972839355469 and parameters: {'reset_noise_scale': 0.18896157519165294, 'forward_reward_weight': 0.5080820944124594, 'ctrl_cost_weight': 0.28746767281249097, 'healthy_reward': 1.4079809483475139, 'contact_cost_weight': 0.0009079489471024106, 'healthy_z_lower': 0.16806154060389075, 'healthy_z_upper': 1.176025252279481, 'contact_force_min': -0.898208080877962, 'contact_force_max': 0.7842044542774407, 'learning_rate': 1.9538893217752334e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9692034493037482, 'gae_lambda': 0.9859069778984532, 'clip_range': 0.3186854254957641, 'ent_coef': 0.023839712677443704, 'std_penalty_weight': 0.09922682825066448}. Best is trial 21 with value: 9.950752258300781.


Mean is: 9.510622024536133, Std is: 11.646540641784668



[I 2025-02-12 18:23:00,544] Trial 25 finished with value: -8.764734268188477 and parameters: {'reset_noise_scale': 0.16376223617146513, 'forward_reward_weight': 0.6623733499323008, 'ctrl_cost_weight': 0.4749435341635698, 'healthy_reward': 1.2033029367416703, 'contact_cost_weight': 0.0008807039206337044, 'healthy_z_lower': 0.15825128010266884, 'healthy_z_upper': 1.114303610898063, 'contact_force_min': -0.9934446892106928, 'contact_force_max': 0.9043891892563813, 'learning_rate': 3.350791135925115e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9711395847237668, 'gae_lambda': 0.9919956835130682, 'clip_range': 0.31712218654820346, 'ent_coef': 0.07307036144146659, 'std_penalty_weight': 0.1549769690631029}. Best is trial 21 with value: 9.950752258300781.


Mean is: -7.020813941955566, Std is: 11.25277042388916



[I 2025-02-12 18:24:15,973] Trial 26 finished with value: 6.790387153625488 and parameters: {'reset_noise_scale': 0.14146298947548788, 'forward_reward_weight': 0.7905866296746507, 'ctrl_cost_weight': 0.157693345146177, 'healthy_reward': 1.326747621523235, 'contact_cost_weight': 0.0007508025742619577, 'healthy_z_lower': 0.18790401935621553, 'healthy_z_upper': 1.1631120496297953, 'contact_force_min': -0.8198024696417148, 'contact_force_max': 0.8374432716766349, 'learning_rate': 1.0062267979340276e-05, 'n_steps': 6144, 'batch_size': 2048, 'gamma': 0.9670468922509408, 'gae_lambda': 0.9823082734696721, 'clip_range': 0.3529720043127139, 'ent_coef': 0.058686144452991765, 'std_penalty_weight': 0.2089664475347097}. Best is trial 21 with value: 9.950752258300781.


Mean is: 9.593409538269043, Std is: 13.413743019104004



[I 2025-02-12 18:25:31,609] Trial 27 finished with value: -3.6508023738861084 and parameters: {'reset_noise_scale': 0.17271763454837147, 'forward_reward_weight': 0.5935578446035532, 'ctrl_cost_weight': 0.36461665538853605, 'healthy_reward': 1.4269667525767615, 'contact_cost_weight': 0.0009389344874089658, 'healthy_z_lower': 0.2243112239805301, 'healthy_z_upper': 1.1317743403431182, 'contact_force_min': -0.941389180129457, 'contact_force_max': 0.7611798885630207, 'learning_rate': 1.5337020559069945e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9691484058692764, 'gae_lambda': 0.9997253009311484, 'clip_range': 0.3768776183721215, 'ent_coef': 0.04316649984095018, 'std_penalty_weight': 0.048077523826138065}. Best is trial 21 with value: 9.950752258300781.


Mean is: -3.3969995975494385, Std is: 5.279033184051514



[I 2025-02-12 18:26:36,644] Trial 28 finished with value: 0.03265903890132904 and parameters: {'reset_noise_scale': 0.15894193863984976, 'forward_reward_weight': 0.7172465739681145, 'ctrl_cost_weight': 0.2679509939966434, 'healthy_reward': 1.2444688610592551, 'contact_cost_weight': 0.000845462487303454, 'healthy_z_lower': 0.2965738401336277, 'healthy_z_upper': 1.0762087429819274, 'contact_force_min': -0.8721081275617195, 'contact_force_max': 0.9265882045310786, 'learning_rate': 2.6814397802675414e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9700578067656033, 'gae_lambda': 0.9673582155061048, 'clip_range': 0.34038916627892746, 'ent_coef': 0.08939302972815694, 'std_penalty_weight': 0.09684376964765892}. Best is trial 21 with value: 9.950752258300781.


Mean is: 0.19816572964191437, Std is: 1.7090070247650146



[I 2025-02-12 18:27:44,985] Trial 29 finished with value: -4.88953971862793 and parameters: {'reset_noise_scale': 0.13770675965314824, 'forward_reward_weight': 0.6086771917983256, 'ctrl_cost_weight': 0.6367757875173623, 'healthy_reward': 1.4356944974882437, 'contact_cost_weight': 0.0009947938407368453, 'healthy_z_lower': 0.12724989705976886, 'healthy_z_upper': 1.0201301875916022, 'contact_force_min': -0.9090176603323786, 'contact_force_max': 0.8792464153562648, 'learning_rate': 2.429728146747334e-05, 'n_steps': 6144, 'batch_size': 2048, 'gamma': 0.973509682478265, 'gae_lambda': 0.9639100236588457, 'clip_range': 0.3118137379419292, 'ent_coef': 0.0521483154423153, 'std_penalty_weight': 0.13283610253598688}. Best is trial 21 with value: 9.950752258300781.


Mean is: -3.8728177547454834, Std is: 7.65395975112915



[I 2025-02-12 18:28:57,563] Trial 30 finished with value: 6.104964256286621 and parameters: {'reset_noise_scale': 0.1899668161419444, 'forward_reward_weight': 0.7712807004988481, 'ctrl_cost_weight': 0.10201739981005738, 'healthy_reward': 1.359469122765042, 'contact_cost_weight': 0.0007590884970880425, 'healthy_z_lower': 0.15981738060718848, 'healthy_z_upper': 1.0990242167156803, 'contact_force_min': -0.5136918377121746, 'contact_force_max': 0.8150575195591178, 'learning_rate': 1.6169487011215644e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9675496720176742, 'gae_lambda': 0.9802374626885364, 'clip_range': 0.4433017363496666, 'ent_coef': 0.03190171687195285, 'std_penalty_weight': 0.053501018800960434}. Best is trial 21 with value: 9.950752258300781.


Mean is: 6.705286502838135, Std is: 11.220765113830566



[I 2025-02-12 18:30:17,095] Trial 31 finished with value: 9.063615798950195 and parameters: {'reset_noise_scale': 0.1651689368737248, 'forward_reward_weight': 0.7027632273513169, 'ctrl_cost_weight': 0.2795868042360422, 'healthy_reward': 1.4995938553994366, 'contact_cost_weight': 0.0009805716742181418, 'healthy_z_lower': 0.17201395707432976, 'healthy_z_upper': 1.1642944934397548, 'contact_force_min': -0.8882402766489682, 'contact_force_max': 0.8194229869602615, 'learning_rate': 1.9830605415085656e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9692311871166494, 'gae_lambda': 0.9945182367396626, 'clip_range': 0.3334473524800045, 'ent_coef': 0.058230140770598855, 'std_penalty_weight': 0.08572396247238467}. Best is trial 21 with value: 9.950752258300781.


Mean is: 10.092080116271973, Std is: 11.997400283813477



[I 2025-02-12 18:31:35,979] Trial 32 finished with value: 10.757169723510742 and parameters: {'reset_noise_scale': 0.15678250161289736, 'forward_reward_weight': 0.6690143028629072, 'ctrl_cost_weight': 0.1804943316596958, 'healthy_reward': 1.4339256757456236, 'contact_cost_weight': 0.0009171049629716507, 'healthy_z_lower': 0.1705091155166413, 'healthy_z_upper': 1.1735038100592123, 'contact_force_min': -0.8744689571315648, 'contact_force_max': 0.7405656710874401, 'learning_rate': 1.3310521649879398e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9714865325918901, 'gae_lambda': 0.9936569085820786, 'clip_range': 0.31145746948714936, 'ent_coef': 0.07146730988947003, 'std_penalty_weight': 0.08891122101758356}. Best is trial 32 with value: 10.757169723510742.


Mean is: 12.042339324951172, Std is: 14.454522132873535



[I 2025-02-12 18:32:53,522] Trial 33 finished with value: 8.83039379119873 and parameters: {'reset_noise_scale': 0.11898771519966425, 'forward_reward_weight': 0.8730123347716405, 'ctrl_cost_weight': 0.1801241862764007, 'healthy_reward': 1.3473680169824878, 'contact_cost_weight': 0.0009143966824570589, 'healthy_z_lower': 0.1881022204037473, 'healthy_z_upper': 1.133986294637153, 'contact_force_min': -0.9418762968570383, 'contact_force_max': 0.7335707243126912, 'learning_rate': 1.1925648772528483e-05, 'n_steps': 8192, 'batch_size': 1024, 'gamma': 0.9715617837172389, 'gae_lambda': 0.9871230442718821, 'clip_range': 0.3114964971312656, 'ent_coef': 0.07219724782868392, 'std_penalty_weight': 0.037025818583772414}. Best is trial 32 with value: 10.757169723510742.


Mean is: 9.283956527709961, Std is: 12.249900817871094



[I 2025-02-12 18:34:14,283] Trial 34 finished with value: -10.680872917175293 and parameters: {'reset_noise_scale': 0.1555682668496136, 'forward_reward_weight': 0.6368331255553398, 'ctrl_cost_weight': 0.38672975204893106, 'healthy_reward': 1.4160459692674143, 'contact_cost_weight': 0.0008678107743038099, 'healthy_z_lower': 0.21191268396127003, 'healthy_z_upper': 1.1995719288630422, 'contact_force_min': -0.8246636169742384, 'contact_force_max': 0.8597072216519258, 'learning_rate': 4.109564950290993e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9730015263564157, 'gae_lambda': 0.9912524646671033, 'clip_range': 0.4990843270396625, 'ent_coef': 0.07947384422558931, 'std_penalty_weight': 0.12846438166695934}. Best is trial 32 with value: 10.757169723510742.


Mean is: -9.343185424804688, Std is: 10.412904739379883



[I 2025-02-12 18:35:32,460] Trial 35 finished with value: 10.345540046691895 and parameters: {'reset_noise_scale': 0.17813602058118005, 'forward_reward_weight': 0.5527775805750166, 'ctrl_cost_weight': 0.17678954187450086, 'healthy_reward': 1.432568160623599, 'contact_cost_weight': 0.000797203188870266, 'healthy_z_lower': 0.15111784341805817, 'healthy_z_upper': 1.1220984467317359, 'contact_force_min': -0.8530338092424636, 'contact_force_max': 0.7790678378163115, 'learning_rate': 6.112379697466186e-05, 'n_steps': 8192, 'batch_size': 2048, 'gamma': 0.9713103963280777, 'gae_lambda': 0.9819315531774776, 'clip_range': 0.31118123371633694, 'ent_coef': 0.07015475634628873, 'std_penalty_weight': 0.023174121407812824}. Best is trial 32 with value: 10.757169723510742.


Mean is: 10.658203125, Std is: 13.491890907287598



[I 2025-02-12 18:36:46,270] Trial 36 finished with value: 8.005556106567383 and parameters: {'reset_noise_scale': 0.17590632874462833, 'forward_reward_weight': 0.5567209057761984, 'ctrl_cost_weight': 0.16926486580539452, 'healthy_reward': 1.4302831023083922, 'contact_cost_weight': 0.0006909573243870496, 'healthy_z_lower': 0.15330771268086935, 'healthy_z_upper': 1.1224477039572476, 'contact_force_min': -0.8577848361617888, 'contact_force_max': 0.6474962879538033, 'learning_rate': 3.7856117141289776e-05, 'n_steps': 6144, 'batch_size': 2048, 'gamma': 0.9712175184756054, 'gae_lambda': 0.9771273908482897, 'clip_range': 0.3123760987773466, 'ent_coef': 0.07027535918673224, 'std_penalty_weight': 0.01559298349953124}. Best is trial 32 with value: 10.757169723510742.


Mean is: 8.200026512145996, Std is: 12.47164249420166



[I 2025-02-12 18:38:06,019] Trial 37 finished with value: 11.180598258972168 and parameters: {'reset_noise_scale': 0.1837709087670397, 'forward_reward_weight': 0.582192045953415, 'ctrl_cost_weight': 0.16854474283919024, 'healthy_reward': 1.3282472292010847, 'contact_cost_weight': 0.0008100961183608296, 'healthy_z_lower': 0.13850574243255018, 'healthy_z_upper': 1.1841711319151458, 'contact_force_min': -0.9198925964716085, 'contact_force_max': 0.7138071978339429, 'learning_rate': 5.627695190789756e-05, 'n_steps': 8192, 'batch_size': 1024, 'gamma': 0.9661977659519276, 'gae_lambda': 0.9946025986910899, 'clip_range': 0.3273287692880437, 'ent_coef': 0.06287846815029899, 'std_penalty_weight': 0.02152125820728281}. Best is trial 37 with value: 11.180598258972168.


Mean is: 11.48532485961914, Std is: 14.159337997436523



[I 2025-02-12 18:39:22,531] Trial 38 finished with value: 9.702545166015625 and parameters: {'reset_noise_scale': 0.19921193257931225, 'forward_reward_weight': 0.5747564737139644, 'ctrl_cost_weight': 0.16334467036658076, 'healthy_reward': 1.3246946178228516, 'contact_cost_weight': 0.0007974468640666347, 'healthy_z_lower': 0.14466213688459312, 'healthy_z_upper': 1.1754680312138368, 'contact_force_min': -0.9225174435562582, 'contact_force_max': 0.7080871817087473, 'learning_rate': 6.388545482615238e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9655804804281427, 'gae_lambda': 0.9698047077651318, 'clip_range': 0.32902556535959254, 'ent_coef': 0.06341612449835378, 'std_penalty_weight': 0.004851063040678338}. Best is trial 37 with value: 11.180598258972168.


Mean is: 9.768925666809082, Std is: 13.683648109436035



[I 2025-02-12 18:40:41,169] Trial 39 finished with value: -13.206476211547852 and parameters: {'reset_noise_scale': 0.18869682552657652, 'forward_reward_weight': 0.5019023140827299, 'ctrl_cost_weight': 0.4986857026592866, 'healthy_reward': 1.1391640101586054, 'contact_cost_weight': 0.0007281580212774076, 'healthy_z_lower': 0.13913583584706116, 'healthy_z_upper': 1.1800275269192169, 'contact_force_min': -0.9626973338424802, 'contact_force_max': 0.745629228512031, 'learning_rate': 6.101247464091932e-05, 'n_steps': 8192, 'batch_size': 1024, 'gamma': 0.9661353264852187, 'gae_lambda': 0.9795861246885619, 'clip_range': 0.3440920371429937, 'ent_coef': 0.08240014528323905, 'std_penalty_weight': 0.033707784730352575}. Best is trial 37 with value: 11.180598258972168.


Mean is: -12.668145179748535, Std is: 15.970513343811035



[I 2025-02-12 18:41:58,207] Trial 40 finished with value: 5.4004292488098145 and parameters: {'reset_noise_scale': 0.1822897631958327, 'forward_reward_weight': 0.6557524721252309, 'ctrl_cost_weight': 0.21472200449244944, 'healthy_reward': 1.2211051525276826, 'contact_cost_weight': 0.0006366911569202172, 'healthy_z_lower': 0.11429143373172375, 'healthy_z_upper': 1.1380096307040046, 'contact_force_min': -0.8142401879868952, 'contact_force_max': 0.6752266161493294, 'learning_rate': 9.364526501801666e-05, 'n_steps': 8192, 'batch_size': 1024, 'gamma': 0.9727440734621514, 'gae_lambda': 0.9575753004705461, 'clip_range': 0.30943248701459247, 'ent_coef': 0.08997688713893595, 'std_penalty_weight': 0.001872223166935312}. Best is trial 37 with value: 11.180598258972168.


Mean is: 5.4158854484558105, Std is: 8.255569458007812



[I 2025-02-12 18:43:19,698] Trial 41 finished with value: 13.066815376281738 and parameters: {'reset_noise_scale': 0.19989514951613846, 'forward_reward_weight': 0.5707577709089633, 'ctrl_cost_weight': 0.17955228350005534, 'healthy_reward': 1.2994257238082392, 'contact_cost_weight': 0.000800540893826108, 'healthy_z_lower': 0.13585975546949003, 'healthy_z_upper': 1.1878525971388918, 'contact_force_min': -0.9286593046938745, 'contact_force_max': 0.7087331150618578, 'learning_rate': 5.4507680816756614e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9663943902421209, 'gae_lambda': 0.968225063684885, 'clip_range': 0.3266627005023337, 'ent_coef': 0.06355926416287785, 'std_penalty_weight': 0.023445997114355067}. Best is trial 41 with value: 13.066815376281738.


Mean is: 13.415358543395996, Std is: 14.86577320098877



[I 2025-02-12 18:44:38,639] Trial 42 finished with value: 11.839805603027344 and parameters: {'reset_noise_scale': 0.1841187973317828, 'forward_reward_weight': 0.5664982312487767, 'ctrl_cost_weight': 0.14341779883253603, 'healthy_reward': 1.3006267461747352, 'contact_cost_weight': 0.000795517604200478, 'healthy_z_lower': 0.13172456344285266, 'healthy_z_upper': 1.1849568952418172, 'contact_force_min': -0.9240300106122984, 'contact_force_max': 0.6847193974338495, 'learning_rate': 6.020769025881079e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9661052258985944, 'gae_lambda': 0.9931735920502346, 'clip_range': 0.3227829623317068, 'ent_coef': 0.06875766423827606, 'std_penalty_weight': 0.024512203602072444}. Best is trial 41 with value: 13.066815376281738.


Mean is: 12.201058387756348, Std is: 14.737654685974121



[I 2025-02-12 18:45:57,254] Trial 43 finished with value: 10.205840110778809 and parameters: {'reset_noise_scale': 0.19423531902711655, 'forward_reward_weight': 0.5537277471355461, 'ctrl_cost_weight': 0.14047069069917603, 'healthy_reward': 1.2908478264246235, 'contact_cost_weight': 0.0008002778058129336, 'healthy_z_lower': 0.13451371625510952, 'healthy_z_upper': 1.180660943707011, 'contact_force_min': -0.9522310432540448, 'contact_force_max': 0.6183322309726011, 'learning_rate': 0.0001541853427525238, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9660256821382934, 'gae_lambda': 0.9936939722971649, 'clip_range': 0.32932902887899423, 'ent_coef': 0.06923093664658611, 'std_penalty_weight': 0.023564921679317563}. Best is trial 41 with value: 13.066815376281738.


Mean is: 10.524849891662598, Std is: 13.537469863891602



[I 2025-02-12 18:47:17,824] Trial 44 finished with value: -8.701284408569336 and parameters: {'reset_noise_scale': 0.1852625027690458, 'forward_reward_weight': 0.611846076971542, 'ctrl_cost_weight': 0.21933395505886177, 'healthy_reward': 0.8033039021936126, 'contact_cost_weight': 0.0006964143981888994, 'healthy_z_lower': 0.12198911618544726, 'healthy_z_upper': 1.1898787120320478, 'contact_force_min': -0.9872709829212275, 'contact_force_max': 0.678421617039957, 'learning_rate': 5.7679829146092084e-05, 'n_steps': 4096, 'batch_size': 1024, 'gamma': 0.9666498990738372, 'gae_lambda': 0.9935615397580285, 'clip_range': 0.36062878769406037, 'ent_coef': 0.07619822285708955, 'std_penalty_weight': 0.06643918460098164}. Best is trial 41 with value: 13.066815376281738.


Mean is: -8.050012588500977, Std is: 9.802521705627441



[I 2025-02-12 18:48:31,534] Trial 45 finished with value: 6.230220317840576 and parameters: {'reset_noise_scale': 0.1790752748743304, 'forward_reward_weight': 0.6644686274058066, 'ctrl_cost_weight': 0.10130959984246746, 'healthy_reward': 1.301118418665202, 'contact_cost_weight': 0.0007775056594165887, 'healthy_z_lower': 0.14990501891472818, 'healthy_z_upper': 1.1174539818177243, 'contact_force_min': -0.9193677660423585, 'contact_force_max': 0.7187660059425617, 'learning_rate': 7.597387344442533e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9681941669256595, 'gae_lambda': 0.9749893655486532, 'clip_range': 0.3494826211573322, 'ent_coef': 0.004186365877273335, 'std_penalty_weight': 0.10592095762519474}. Best is trial 41 with value: 13.066815376281738.


Mean is: 7.467425346374512, Std is: 11.680456161499023



[I 2025-02-12 18:49:44,235] Trial 46 finished with value: -5.584789752960205 and parameters: {'reset_noise_scale': 0.19328099527878576, 'forward_reward_weight': 0.530240316075582, 'ctrl_cost_weight': 0.316495769942447, 'healthy_reward': 1.451534516196971, 'contact_cost_weight': 0.0008495132922647917, 'healthy_z_lower': 0.13183155760705823, 'healthy_z_upper': 1.177457346218853, 'contact_force_min': -0.8523059167843532, 'contact_force_max': 0.6847379043127646, 'learning_rate': 0.0006152515172593649, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9673430346167267, 'gae_lambda': 0.9880269785244931, 'clip_range': 0.32525964448374606, 'ent_coef': 0.07324002565244872, 'std_penalty_weight': 0.022965044849173923}. Best is trial 41 with value: 13.066815376281738.


Mean is: -5.369828224182129, Std is: 9.36037826538086



[I 2025-02-12 18:50:49,235] Trial 47 finished with value: 1.1120585203170776 and parameters: {'reset_noise_scale': 0.17280268857962688, 'forward_reward_weight': 0.5695813176436828, 'ctrl_cost_weight': 0.19629917779017494, 'healthy_reward': 1.2508514865697677, 'contact_cost_weight': 0.0006069947291186623, 'healthy_z_lower': 0.11750301933923947, 'healthy_z_upper': 0.8831427272650881, 'contact_force_min': -0.9250630240235097, 'contact_force_max': 0.7692514231049252, 'learning_rate': 4.840342134895338e-05, 'n_steps': 4096, 'batch_size': 1024, 'gamma': 0.97484018613045, 'gae_lambda': 0.9945358862658717, 'clip_range': 0.4130216920246827, 'ent_coef': 0.04812259043640562, 'std_penalty_weight': 0.07028460789238403}. Best is trial 41 with value: 13.066815376281738.


Mean is: 1.390495777130127, Std is: 3.961568593978882



[I 2025-02-12 18:52:14,027] Trial 48 finished with value: 11.199966430664062 and parameters: {'reset_noise_scale': 0.18081624832278828, 'forward_reward_weight': 0.6750329632211741, 'ctrl_cost_weight': 0.13612565898130996, 'healthy_reward': 0.9958918931505819, 'contact_cost_weight': 0.000693200742149278, 'healthy_z_lower': 0.14323902192292762, 'healthy_z_upper': 1.1975571720522709, 'contact_force_min': -0.9730710791773765, 'contact_force_max': 0.5503285728951934, 'learning_rate': 0.00012389571250041956, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9650478891635366, 'gae_lambda': 0.9496590980582424, 'clip_range': 0.3819088577251617, 'ent_coef': 0.06573115522742094, 'std_penalty_weight': 0.0007280846177700161}. Best is trial 41 with value: 13.066815376281738.


Mean is: 11.20824909210205, Std is: 11.376546859741211



[I 2025-02-12 18:53:20,164] Trial 49 finished with value: 0.7054859399795532 and parameters: {'reset_noise_scale': 0.18506036654983918, 'forward_reward_weight': 0.6763974199953253, 'ctrl_cost_weight': 0.13520479726877566, 'healthy_reward': 0.8582125920267659, 'contact_cost_weight': 0.0006963155170510303, 'healthy_z_lower': 0.13937498453214459, 'healthy_z_upper': 0.817522130025775, 'contact_force_min': -0.9774255301625472, 'contact_force_max': 0.5572805847637506, 'learning_rate': 0.0001534019884289347, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9650554732609362, 'gae_lambda': 0.9466187253464433, 'clip_range': 0.38741644479176945, 'ent_coef': 0.06518048120196504, 'std_penalty_weight': 0.0022637823566173214}. Best is trial 41 with value: 13.066815376281738.


Mean is: 0.7102017402648926, Std is: 2.0831515789031982



[I 2025-02-12 18:54:35,128] Trial 50 finished with value: -12.386798858642578 and parameters: {'reset_noise_scale': 0.14726404838869195, 'forward_reward_weight': 1.063641119263281, 'ctrl_cost_weight': 0.7596183320389405, 'healthy_reward': 1.0874081772396607, 'contact_cost_weight': 0.0005396103461804235, 'healthy_z_lower': 0.10255567948318224, 'healthy_z_upper': 1.1940994685354716, 'contact_force_min': -0.9604487266151076, 'contact_force_max': 0.5005291418976637, 'learning_rate': 0.00027942204221844205, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.965936169377975, 'gae_lambda': 0.9528289321231855, 'clip_range': 0.40675192379891756, 'ent_coef': 0.05117593335125273, 'std_penalty_weight': 0.26484294602818825}. Best is trial 41 with value: 13.066815376281738.


Mean is: -8.698099136352539, Std is: 13.927876472473145



[I 2025-02-12 18:55:51,137] Trial 51 finished with value: 5.780584335327148 and parameters: {'reset_noise_scale': 0.19978436321646173, 'forward_reward_weight': 0.5992251519406597, 'ctrl_cost_weight': 0.13871859919139412, 'healthy_reward': 1.005003653906809, 'contact_cost_weight': 0.0008144742380009374, 'healthy_z_lower': 0.15268125747901523, 'healthy_z_upper': 1.1462761614822043, 'contact_force_min': -0.9974811204761271, 'contact_force_max': 0.6492485343916956, 'learning_rate': 0.00011017392597873572, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9664192277495276, 'gae_lambda': 0.9359896843740301, 'clip_range': 0.3662225346906319, 'ent_coef': 0.06822103576299463, 'std_penalty_weight': 0.029324835915409627}. Best is trial 41 with value: 13.066815376281738.


Mean is: 6.060089111328125, Std is: 9.531325340270996



[I 2025-02-12 18:57:09,920] Trial 52 finished with value: -4.968439102172852 and parameters: {'reset_noise_scale': 0.18009065353330575, 'forward_reward_weight': 0.6321492776445465, 'ctrl_cost_weight': 0.24214813456435158, 'healthy_reward': 1.0029067447290103, 'contact_cost_weight': 0.0007260754985382905, 'healthy_z_lower': 0.12939851889681295, 'healthy_z_upper': 1.1696752466167821, 'contact_force_min': -0.9410354509846536, 'contact_force_max': 0.5518852015928877, 'learning_rate': 8.05398574558588e-05, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9656763056626317, 'gae_lambda': 0.9279617008611459, 'clip_range': 0.3355361209374231, 'ent_coef': 0.08016914263578999, 'std_penalty_weight': 0.02280848633777804}. Best is trial 41 with value: 13.066815376281738.


Mean is: -4.815789222717285, Std is: 6.692687511444092



[I 2025-02-12 18:58:31,127] Trial 53 finished with value: 9.219576835632324 and parameters: {'reset_noise_scale': 0.19393892494048232, 'forward_reward_weight': 0.528336499514868, 'ctrl_cost_weight': 0.19983549915655896, 'healthy_reward': 1.1680018478654808, 'contact_cost_weight': 0.0008933741183788118, 'healthy_z_lower': 0.14403136616702558, 'healthy_z_upper': 1.1975796695493477, 'contact_force_min': -0.833691852358039, 'contact_force_max': 0.7898652207612218, 'learning_rate': 0.00012914050163266447, 'n_steps': 4096, 'batch_size': 1024, 'gamma': 0.9769892577287452, 'gae_lambda': 0.9558499571891612, 'clip_range': 0.34656280160555814, 'ent_coef': 0.061910119576783, 'std_penalty_weight': 0.06383839039567296}. Best is trial 41 with value: 13.066815376281738.


Mean is: 9.930550575256348, Std is: 11.137091636657715



[I 2025-02-12 18:59:38,436] Trial 54 finished with value: -0.6938416361808777 and parameters: {'reset_noise_scale': 0.1723955922852225, 'forward_reward_weight': 0.5376375101908122, 'ctrl_cost_weight': 0.31438746566794246, 'healthy_reward': 1.3731655396027695, 'contact_cost_weight': 0.0008318506024908521, 'healthy_z_lower': 0.13688523940258138, 'healthy_z_upper': 0.966634588567282, 'contact_force_min': -0.912429701946776, 'contact_force_max': 0.6915077822891946, 'learning_rate': 6.829298197407333e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9669503546338775, 'gae_lambda': 0.9482398427099876, 'clip_range': 0.3218078499262498, 'ent_coef': 0.08493095975482648, 'std_penalty_weight': 0.11816990952191338}. Best is trial 41 with value: 13.066815376281738.


Mean is: -0.480073481798172, Std is: 1.8089898824691772



[I 2025-02-12 19:00:53,323] Trial 55 finished with value: -4.888595104217529 and parameters: {'reset_noise_scale': 0.18596914470587342, 'forward_reward_weight': 0.5780194648799414, 'ctrl_cost_weight': 0.23653716222386195, 'healthy_reward': 1.0505101333329199, 'contact_cost_weight': 0.000659019051613333, 'healthy_z_lower': 0.16150789481339806, 'healthy_z_upper': 1.0988393769028346, 'contact_force_min': -0.8642656754903735, 'contact_force_max': 0.7189768318196684, 'learning_rate': 9.634346933744625e-05, 'n_steps': 6144, 'batch_size': 512, 'gamma': 0.9650698118653465, 'gae_lambda': 0.9622387485544345, 'clip_range': 0.4525885577685129, 'ent_coef': 0.07455712287857594, 'std_penalty_weight': 0.3990324095238003}. Best is trial 41 with value: 13.066815376281738.


Mean is: -2.747750997543335, Std is: 5.36508846282959



[I 2025-02-12 19:02:14,554] Trial 56 finished with value: 10.34175968170166 and parameters: {'reset_noise_scale': 0.17870445005079155, 'forward_reward_weight': 0.8450321563571319, 'ctrl_cost_weight': 0.13633572341193897, 'healthy_reward': 0.9632420678199451, 'contact_cost_weight': 0.0007760282942954665, 'healthy_z_lower': 0.11116047378281024, 'healthy_z_upper': 1.184501690628662, 'contact_force_min': -0.9755839821802147, 'contact_force_max': 0.658317031735715, 'learning_rate': 4.7551920334763774e-05, 'n_steps': 4096, 'batch_size': 1024, 'gamma': 0.9719822753450827, 'gae_lambda': 0.9726624579345247, 'clip_range': 0.3083198388721716, 'ent_coef': 0.06566029039692993, 'std_penalty_weight': 0.051755851320722676}. Best is trial 41 with value: 13.066815376281738.


Mean is: 10.962969779968262, Std is: 12.00269889831543



[I 2025-02-12 19:03:26,151] Trial 57 finished with value: -8.468996047973633 and parameters: {'reset_noise_scale': 0.16978957449700277, 'forward_reward_weight': 0.6838517735779708, 'ctrl_cost_weight': 0.8853848997183345, 'healthy_reward': 1.3193179857820228, 'contact_cost_weight': 0.0009357673301079322, 'healthy_z_lower': 0.12126944483389092, 'healthy_z_upper': 1.147114140784678, 'contact_force_min': -0.7995887723685383, 'contact_force_max': 0.617243118878824, 'learning_rate': 0.0002183347375394224, 'n_steps': 8192, 'batch_size': 1024, 'gamma': 0.9795767667563068, 'gae_lambda': 0.9902828650647809, 'clip_range': 0.38159650792185207, 'ent_coef': 0.09509357454452394, 'std_penalty_weight': 0.17646445770518238}. Best is trial 41 with value: 13.066815376281738.


Mean is: -6.409414768218994, Std is: 11.671368598937988



[I 2025-02-12 19:04:41,914] Trial 58 finished with value: -8.206252098083496 and parameters: {'reset_noise_scale': 0.19417148523150352, 'forward_reward_weight': 0.7729847468220196, 'ctrl_cost_weight': 0.995233446668559, 'healthy_reward': 0.7249672979702165, 'contact_cost_weight': 0.000862303277590736, 'healthy_z_lower': 0.15408727831239738, 'healthy_z_upper': 1.1292767588708377, 'contact_force_min': -0.8816973934759751, 'contact_force_max': 0.7487651444347306, 'learning_rate': 3.683346135116832e-05, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9682620418783939, 'gae_lambda': 0.9431168855288956, 'clip_range': 0.36111752285698995, 'ent_coef': 0.07765533140005393, 'std_penalty_weight': 0.0202602239142577}. Best is trial 41 with value: 13.066815376281738.


Mean is: -7.950092792510986, Std is: 12.643471717834473



[I 2025-02-12 19:06:00,524] Trial 59 finished with value: 3.0662975311279297 and parameters: {'reset_noise_scale': 0.10858865287694919, 'forward_reward_weight': 0.6255045927302683, 'ctrl_cost_weight': 0.18990034415048856, 'healthy_reward': 0.9329200125305523, 'contact_cost_weight': 0.000712588028413332, 'healthy_z_lower': 0.14543218052179585, 'healthy_z_upper': 1.1563843769177482, 'contact_force_min': -0.7295862387828331, 'contact_force_max': 0.6294991987378126, 'learning_rate': 7.102642176911953e-05, 'n_steps': 6144, 'batch_size': 1024, 'gamma': 0.9676505042103927, 'gae_lambda': 0.9842626869492921, 'clip_range': 0.3213644020491712, 'ent_coef': 0.06171498298329973, 'std_penalty_weight': 0.08427012277447196}. Best is trial 41 with value: 13.066815376281738.


Mean is: 3.4838833808898926, Std is: 4.955323696136475



[I 2025-02-12 19:07:33,242] Trial 60 finished with value: 13.665643692016602 and parameters: {'reset_noise_scale': 0.15386361450227126, 'forward_reward_weight': 0.9544687139303828, 'ctrl_cost_weight': 0.2480265397261318, 'healthy_reward': 1.4674574426105578, 'contact_cost_weight': 0.0007673804202533016, 'healthy_z_lower': 0.12602825808322732, 'healthy_z_upper': 1.1998278030175065, 'contact_force_min': -0.9363891437900882, 'contact_force_max': 0.5512228513722864, 'learning_rate': 0.00013685635951938, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9664503619310055, 'gae_lambda': 0.9968603137766076, 'clip_range': 0.33665127323717237, 'ent_coef': 0.011870846742695465, 'std_penalty_weight': 0.15281092151002615}. Best is trial 60 with value: 13.665643692016602.


Mean is: 15.89486312866211, Std is: 14.588089942932129



[I 2025-02-12 19:09:04,350] Trial 61 finished with value: 15.7705717086792 and parameters: {'reset_noise_scale': 0.15370035147953226, 'forward_reward_weight': 0.9706881390864317, 'ctrl_cost_weight': 0.10120945933636091, 'healthy_reward': 1.4562160700861282, 'contact_cost_weight': 0.0007868894411066615, 'healthy_z_lower': 0.12613304007125511, 'healthy_z_upper': 1.1669762500307503, 'contact_force_min': -0.9271834761203445, 'contact_force_max': 0.5480427639544361, 'learning_rate': 0.00021144736754555933, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9664152641370989, 'gae_lambda': 0.9959009108986329, 'clip_range': 0.33671267677131667, 'ent_coef': 0.007571693543245717, 'std_penalty_weight': 0.19208942435729465}. Best is trial 61 with value: 15.7705717086792.


Mean is: 19.056856155395508, Std is: 17.108095169067383



[I 2025-02-12 19:10:44,802] Trial 62 finished with value: 17.296598434448242 and parameters: {'reset_noise_scale': 0.13235455116441758, 'forward_reward_weight': 1.00318575163628, 'ctrl_cost_weight': 0.10209529189321805, 'healthy_reward': 1.4684854532154026, 'contact_cost_weight': 0.000756505654140487, 'healthy_z_lower': 0.12600385631179348, 'healthy_z_upper': 1.1994287122279428, 'contact_force_min': -0.9506150832939143, 'contact_force_max': 0.5419392353226989, 'learning_rate': 0.0001940946161213782, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9664255522305345, 'gae_lambda': 0.9967085917159132, 'clip_range': 0.33688146119037476, 'ent_coef': 0.008811312359304657, 'std_penalty_weight': 0.15102752596708147}. Best is trial 62 with value: 17.296598434448242.


Mean is: 19.88080596923828, Std is: 17.110837936401367



[I 2025-02-12 19:12:27,016] Trial 63 finished with value: 16.71560287475586 and parameters: {'reset_noise_scale': 0.11716621165632601, 'forward_reward_weight': 1.012871121160651, 'ctrl_cost_weight': 0.10323953363591787, 'healthy_reward': 1.4680807812808767, 'contact_cost_weight': 0.0007493880695896277, 'healthy_z_lower': 0.1096786448255295, 'healthy_z_upper': 1.1998988090490148, 'contact_force_min': -0.9496743171478493, 'contact_force_max': 0.5423993428168885, 'learning_rate': 0.0002079502422915761, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9664578078940261, 'gae_lambda': 0.9967978335154756, 'clip_range': 0.3395214556500478, 'ent_coef': 0.009373131052084436, 'std_penalty_weight': 0.19087285459522502}. Best is trial 62 with value: 17.296598434448242.


Mean is: 19.881893157958984, Std is: 16.58847427368164



[I 2025-02-12 19:14:03,920] Trial 64 finished with value: 16.471939086914062 and parameters: {'reset_noise_scale': 0.11799372635503305, 'forward_reward_weight': 0.9581012515313924, 'ctrl_cost_weight': 0.12762718333545964, 'healthy_reward': 1.462241420045984, 'contact_cost_weight': 0.0007513871171693201, 'healthy_z_lower': 0.1096727509709976, 'healthy_z_upper': 1.199178855253361, 'contact_force_min': -0.9575411301210038, 'contact_force_max': 0.5338737408326883, 'learning_rate': 0.0002994715706541376, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9671186594813527, 'gae_lambda': 0.9976075980397114, 'clip_range': 0.337709175390256, 'ent_coef': 0.010350173493242623, 'std_penalty_weight': 0.1908972235125843}. Best is trial 62 with value: 17.296598434448242.


Mean is: 19.67957305908203, Std is: 16.802936553955078



[I 2025-02-12 19:15:41,941] Trial 65 finished with value: 14.180273056030273 and parameters: {'reset_noise_scale': 0.11632443058247806, 'forward_reward_weight': 0.9998011995359586, 'ctrl_cost_weight': 0.10587862201549589, 'healthy_reward': 1.4659798918049747, 'contact_cost_weight': 0.0006638236884655093, 'healthy_z_lower': 0.10737182708761756, 'healthy_z_upper': 1.1665723188231687, 'contact_force_min': -0.9468330142162119, 'contact_force_max': 0.5309369510516948, 'learning_rate': 0.00035851210381591513, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9671334063810331, 'gae_lambda': 0.9989391932936309, 'clip_range': 0.33746888517312845, 'ent_coef': 0.010495793238507668, 'std_penalty_weight': 0.19225549390423374}. Best is trial 62 with value: 17.296598434448242.


Mean is: 17.383073806762695, Std is: 16.659082412719727



[I 2025-02-12 19:17:22,902] Trial 66 finished with value: 16.028106689453125 and parameters: {'reset_noise_scale': 0.09712809996094746, 'forward_reward_weight': 1.009173107415903, 'ctrl_cost_weight': 0.10263302264758649, 'healthy_reward': 1.4625930284695496, 'contact_cost_weight': 0.0006549812422819879, 'healthy_z_lower': 0.10629605541708492, 'healthy_z_upper': 1.1654065854158238, 'contact_force_min': -0.9526865556299281, 'contact_force_max': 0.5277977296212731, 'learning_rate': 0.00037489059017362065, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9672401526716291, 'gae_lambda': 0.9997450448618065, 'clip_range': 0.3397469764588458, 'ent_coef': 0.010045244672708885, 'std_penalty_weight': 0.18779757678836909}. Best is trial 62 with value: 17.296598434448242.


Mean is: 19.287981033325195, Std is: 17.35845184326172



[I 2025-02-12 19:18:56,538] Trial 67 finished with value: 12.536685943603516 and parameters: {'reset_noise_scale': 0.09838946742346391, 'forward_reward_weight': 1.0084314319830678, 'ctrl_cost_weight': 0.10180182980245411, 'healthy_reward': 1.464290285942405, 'contact_cost_weight': 0.0006100727102910768, 'healthy_z_lower': 0.10759572264042608, 'healthy_z_upper': 1.1632698339204914, 'contact_force_min': -0.9540849511833926, 'contact_force_max': 0.5308915612837752, 'learning_rate': 0.00037064144048648816, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9671051555885293, 'gae_lambda': 0.9976311144135023, 'clip_range': 0.33711223145445013, 'ent_coef': 0.009767370279855804, 'std_penalty_weight': 0.19247297920839407}. Best is trial 62 with value: 17.296598434448242.


Mean is: 15.620559692382812, Std is: 16.022369384765625



[I 2025-02-12 19:20:28,613] Trial 68 finished with value: 11.540423393249512 and parameters: {'reset_noise_scale': 0.11903689987160401, 'forward_reward_weight': 0.9476554649977615, 'ctrl_cost_weight': 0.11922602099173846, 'healthy_reward': 1.4736208495828833, 'contact_cost_weight': 0.0006451284115181368, 'healthy_z_lower': 0.10076136831896751, 'healthy_z_upper': 1.1495848333045646, 'contact_force_min': -0.9416444741332904, 'contact_force_max': 0.5778771964933381, 'learning_rate': 0.0003296409954047428, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9684688190535791, 'gae_lambda': 0.999283234801297, 'clip_range': 0.3517560966878278, 'ent_coef': 0.01433312492006591, 'std_penalty_weight': 0.23888076349611004}. Best is trial 62 with value: 17.296598434448242.


Mean is: 15.391730308532715, Std is: 16.122299194335938



[I 2025-02-12 19:21:45,350] Trial 69 finished with value: 2.4183077812194824 and parameters: {'reset_noise_scale': 0.1332245924711505, 'forward_reward_weight': 1.1040775759571748, 'ctrl_cost_weight': 0.10018416113009251, 'healthy_reward': 1.3933887836425591, 'contact_cost_weight': 0.0005601351576886284, 'healthy_z_lower': 0.10696239912218762, 'healthy_z_upper': 0.9228266500390551, 'contact_force_min': -0.9997017193145065, 'contact_force_max': 0.5263023673484096, 'learning_rate': 0.000550833895170285, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.967924487162343, 'gae_lambda': 0.9972227849250006, 'clip_range': 0.3455361458408812, 'ent_coef': 0.01982867849712623, 'std_penalty_weight': 0.1504778556555209}. Best is trial 62 with value: 17.296598434448242.


Mean is: 3.4043657779693604, Std is: 6.552844047546387



[I 2025-02-12 19:23:23,888] Trial 70 finished with value: 15.721816062927246 and parameters: {'reset_noise_scale': 0.11530851018119033, 'forward_reward_weight': 0.9853731412928372, 'ctrl_cost_weight': 0.11876131013843658, 'healthy_reward': 1.4762341001196546, 'contact_cost_weight': 0.0007423674534085177, 'healthy_z_lower': 0.11427630542288064, 'healthy_z_upper': 1.167560558435178, 'contact_force_min': -0.9037385612071778, 'contact_force_max': 0.5756093487779126, 'learning_rate': 0.0002136598723235309, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9674424196292232, 'gae_lambda': 0.9894075559527521, 'clip_range': 0.35837721593409466, 'ent_coef': 5.0438920475445737e-05, 'std_penalty_weight': 0.23042930637937198}. Best is trial 62 with value: 17.296598434448242.


Mean is: 19.56256866455078, Std is: 16.667814254760742



[I 2025-02-12 19:25:05,457] Trial 71 finished with value: 18.305683135986328 and parameters: {'reset_noise_scale': 0.11580375720612926, 'forward_reward_weight': 0.99934560343593, 'ctrl_cost_weight': 0.1229576240208839, 'healthy_reward': 1.4661833420831654, 'contact_cost_weight': 0.0007414372869111981, 'healthy_z_lower': 0.11531819070449956, 'healthy_z_upper': 1.1672915815674416, 'contact_force_min': -0.9014092588121949, 'contact_force_max': 0.5750114508532531, 'learning_rate': 0.0001888971340883665, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9674200836148498, 'gae_lambda': 0.9964137381141676, 'clip_range': 0.3586764695541677, 'ent_coef': 4.669532240675865e-05, 'std_penalty_weight': 0.23282019240423632}. Best is trial 71 with value: 18.305683135986328.


Mean is: 22.212881088256836, Std is: 16.78203773498535



[I 2025-02-12 19:26:41,663] Trial 72 finished with value: 13.503702163696289 and parameters: {'reset_noise_scale': 0.11598600863999342, 'forward_reward_weight': 1.0070154151692794, 'ctrl_cost_weight': 0.12427101655423478, 'healthy_reward': 1.461480265250419, 'contact_cost_weight': 0.0006688524078419225, 'healthy_z_lower': 0.11589457530758461, 'healthy_z_upper': 1.1646614565186624, 'contact_force_min': -0.8998140867235097, 'contact_force_max': 0.5759146352398281, 'learning_rate': 0.0001969918791966286, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9672969859463234, 'gae_lambda': 0.9901342663607121, 'clip_range': 0.3575352445246312, 'ent_coef': 0.001339955485122542, 'std_penalty_weight': 0.23261457677352657}. Best is trial 71 with value: 18.305683135986328.


Mean is: 17.284440994262695, Std is: 16.25323486328125



[I 2025-02-12 19:28:10,077] Trial 73 finished with value: 9.73786735534668 and parameters: {'reset_noise_scale': 0.09392917754516246, 'forward_reward_weight': 0.917365801993593, 'ctrl_cost_weight': 0.15392365455106854, 'healthy_reward': 1.3974821031989144, 'contact_cost_weight': 0.0007403982551312273, 'healthy_z_lower': 0.10912706889754238, 'healthy_z_upper': 1.1396184857096177, 'contact_force_min': -0.952988154405765, 'contact_force_max': 0.5314392300156235, 'learning_rate': 0.0002742724784212753, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9668195890458814, 'gae_lambda': 0.9998882449786828, 'clip_range': 0.3672450918668012, 'ent_coef': 0.008238921472331424, 'std_penalty_weight': 0.27268647845312255}. Best is trial 71 with value: 18.305683135986328.


Mean is: 13.698766708374023, Std is: 14.525471687316895



[I 2025-02-12 19:29:31,323] Trial 74 finished with value: -3.1557846069335938 and parameters: {'reset_noise_scale': 0.12499626367383701, 'forward_reward_weight': 0.982771963199629, 'ctrl_cost_weight': 0.5919345067392732, 'healthy_reward': 1.4912643874768343, 'contact_cost_weight': 0.0007476661051509826, 'healthy_z_lower': 0.11619224702415999, 'healthy_z_upper': 1.042120535501144, 'contact_force_min': -0.9846489480855642, 'contact_force_max': 0.5935534536634942, 'learning_rate': 0.00018094248030501412, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9678818546741212, 'gae_lambda': 0.9965450534762823, 'clip_range': 0.34060903360485545, 'ent_coef': 0.0008202566972709465, 'std_penalty_weight': 0.1863137858274195}. Best is trial 71 with value: 18.305683135986328.


Mean is: -2.290532350540161, Std is: 4.644059658050537



[I 2025-02-12 19:31:04,433] Trial 75 finished with value: 15.857231140136719 and parameters: {'reset_noise_scale': 0.10407916517213472, 'forward_reward_weight': 1.1001778795622388, 'ctrl_cost_weight': 0.12206810760204634, 'healthy_reward': 1.3720887143207987, 'contact_cost_weight': 0.0006036659948829817, 'healthy_z_lower': 0.12086537176061653, 'healthy_z_upper': 1.1714434828791158, 'contact_force_min': -0.9069580704957153, 'contact_force_max': 0.5005580837414764, 'learning_rate': 0.000430123134441096, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9655509896610168, 'gae_lambda': 0.9865807532810572, 'clip_range': 0.35474981708640607, 'ent_coef': 0.005428479597021323, 'std_penalty_weight': 0.21689048040861375}. Best is trial 71 with value: 18.305683135986328.


Mean is: 19.5974063873291, Std is: 17.24453353881836



[I 2025-02-12 19:32:37,567] Trial 76 finished with value: 11.828100204467773 and parameters: {'reset_noise_scale': 0.08451667476938936, 'forward_reward_weight': 1.0897151321339855, 'ctrl_cost_weight': 0.21636931416197425, 'healthy_reward': 1.3557344908661486, 'contact_cost_weight': 0.0005959374549855704, 'healthy_z_lower': 0.12117050695278472, 'healthy_z_upper': 1.1544343032595656, 'contact_force_min': -0.9029700830150453, 'contact_force_max': 0.5013083256389661, 'learning_rate': 0.0004505453709136096, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9655135397179482, 'gae_lambda': 0.987651716407168, 'clip_range': 0.35456115495720375, 'ent_coef': 0.005606656675691086, 'std_penalty_weight': 0.22059131195408746}. Best is trial 71 with value: 18.305683135986328.


Mean is: 15.071627616882324, Std is: 14.703786849975586



[I 2025-02-12 19:34:01,967] Trial 77 finished with value: 4.784501075744629 and parameters: {'reset_noise_scale': 0.10168242328586342, 'forward_reward_weight': 1.1639017861834158, 'ctrl_cost_weight': 0.1547696250514024, 'healthy_reward': 1.4125898887653277, 'contact_cost_weight': 0.000522246372000117, 'healthy_z_lower': 0.11286195328992185, 'healthy_z_upper': 1.105958968599848, 'contact_force_min': -0.9657910034099261, 'contact_force_max': 0.5732173195633948, 'learning_rate': 0.0002403026307020081, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9655452487988184, 'gae_lambda': 0.984880792799967, 'clip_range': 0.34707513187095806, 'ent_coef': 0.019642580669381772, 'std_penalty_weight': 0.30751430216992826}. Best is trial 71 with value: 18.305683135986328.


Mean is: 8.214669227600098, Std is: 11.154500007629395



[I 2025-02-12 19:35:32,227] Trial 78 finished with value: 9.171524047851562 and parameters: {'reset_noise_scale': 0.10690480177479125, 'forward_reward_weight': 1.035470369047996, 'ctrl_cost_weight': 0.12261029292362872, 'healthy_reward': 1.3765278352174473, 'contact_cost_weight': 0.0005810050250053779, 'healthy_z_lower': 0.12686329988799885, 'healthy_z_upper': 1.0787128466570497, 'contact_force_min': -0.6703038765765807, 'contact_force_max': 0.5198902577862402, 'learning_rate': 0.0003190386238380397, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.96871441095472, 'gae_lambda': 0.9908225932011129, 'clip_range': 0.3703152993278814, 'ent_coef': 0.006047243123016274, 'std_penalty_weight': 0.20568937491490993}. Best is trial 71 with value: 18.305683135986328.


Mean is: 11.915002822875977, Std is: 13.337972640991211



[I 2025-02-12 19:37:07,664] Trial 79 finished with value: 17.373775482177734 and parameters: {'reset_noise_scale': 0.11132768410287176, 'forward_reward_weight': 0.912184591840552, 'ctrl_cost_weight': 0.1999466690002768, 'healthy_reward': 1.4434167094009687, 'contact_cost_weight': 0.000623963235257029, 'healthy_z_lower': 0.11938012268178326, 'healthy_z_upper': 1.1736715032884355, 'contact_force_min': -0.6089108844017048, 'contact_force_max': 0.5634137195273083, 'learning_rate': 0.0006547573612588057, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9677044288742772, 'gae_lambda': 0.9124680570589989, 'clip_range': 0.3618436900122911, 'ent_coef': 0.015689232341307265, 'std_penalty_weight': 0.25337921494563076}. Best is trial 71 with value: 18.305683135986328.


Mean is: 21.831565856933594, Std is: 17.593351364135742



[I 2025-02-12 19:38:23,244] Trial 80 finished with value: 2.3082804679870605 and parameters: {'reset_noise_scale': 0.12392343225845687, 'forward_reward_weight': 0.910988639353619, 'ctrl_cost_weight': 0.15586792815713468, 'healthy_reward': 1.4437307519835487, 'contact_cost_weight': 0.000620911636841792, 'healthy_z_lower': 0.2551761532504008, 'healthy_z_upper': 1.1749387237019824, 'contact_force_min': -0.615333846245377, 'contact_force_max': 0.5432587538162023, 'learning_rate': 0.0009047080102548397, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9667471399017924, 'gae_lambda': 0.9165243088877886, 'clip_range': 0.37253055790238915, 'ent_coef': 0.016127548196523095, 'std_penalty_weight': 0.2533213175512635}. Best is trial 71 with value: 18.305683135986328.


Mean is: 2.9413883686065674, Std is: 2.4992291927337646



[I 2025-02-12 19:40:02,504] Trial 81 finished with value: 22.14696502685547 and parameters: {'reset_noise_scale': 0.11222004449907154, 'forward_reward_weight': 1.035482510225517, 'ctrl_cost_weight': 0.1983293812728689, 'healthy_reward': 1.491845361544792, 'contact_cost_weight': 0.0007018854446460566, 'healthy_z_lower': 0.10058987038355743, 'healthy_z_upper': 1.1886597828205987, 'contact_force_min': -0.5321536524032018, 'contact_force_max': 0.5131211683892082, 'learning_rate': 0.0007395832256367282, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9676087870111953, 'gae_lambda': 0.9109115140551849, 'clip_range': 0.36432731973474336, 'ent_coef': 0.003238452586407573, 'std_penalty_weight': 0.22046945240166685}. Best is trial 81 with value: 22.14696502685547.


Mean is: 26.15064811706543, Std is: 18.15981101989746



[I 2025-02-12 19:41:42,294] Trial 82 finished with value: 21.551406860351562 and parameters: {'reset_noise_scale': 0.11177178805966884, 'forward_reward_weight': 1.0352341776220861, 'ctrl_cost_weight': 0.2067964840031068, 'healthy_reward': 1.410153714402554, 'contact_cost_weight': 0.0006641462555331876, 'healthy_z_lower': 0.10193922937726387, 'healthy_z_upper': 1.1895033038096123, 'contact_force_min': -0.562172699033048, 'contact_force_max': 0.5162638253166749, 'learning_rate': 0.000763796786561699, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.965810893879808, 'gae_lambda': 0.9079589779521706, 'clip_range': 0.3640265273774152, 'ent_coef': 0.0033698273539095524, 'std_penalty_weight': 0.2056260916321625}. Best is trial 81 with value: 22.14696502685547.


Mean is: 24.89508056640625, Std is: 16.260936737060547



[I 2025-02-12 19:43:25,924] Trial 83 finished with value: 25.946252822875977 and parameters: {'reset_noise_scale': 0.11110367405657634, 'forward_reward_weight': 1.036774461213155, 'ctrl_cost_weight': 0.19597892537159195, 'healthy_reward': 1.4113971157691336, 'contact_cost_weight': 0.0006739176107985801, 'healthy_z_lower': 0.10346917564464367, 'healthy_z_upper': 1.1858071139774944, 'contact_force_min': -0.5345124053962315, 'contact_force_max': 0.5091997232840131, 'learning_rate': 0.0007224515890525199, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9658102857483557, 'gae_lambda': 0.909407758476119, 'clip_range': 0.3754293134436752, 'ent_coef': 0.0032669639885243087, 'std_penalty_weight': 0.16648352217887086}. Best is trial 83 with value: 25.946252822875977.


Mean is: 28.71955108642578, Std is: 16.658092498779297



[I 2025-02-12 19:45:03,332] Trial 84 finished with value: 17.570796966552734 and parameters: {'reset_noise_scale': 0.11169252323839532, 'forward_reward_weight': 1.04759093942817, 'ctrl_cost_weight': 0.2637064517783092, 'healthy_reward': 1.4189937435976543, 'contact_cost_weight': 0.0006763857249478091, 'healthy_z_lower': 0.10049560305618414, 'healthy_z_upper': 1.1894539621175315, 'contact_force_min': -0.5346948993185435, 'contact_force_max': 0.5138778224327248, 'learning_rate': 0.0007548056543903635, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9695061179320921, 'gae_lambda': 0.9092461174245565, 'clip_range': 0.3634927671159471, 'ent_coef': 0.01264743109478511, 'std_penalty_weight': 0.16430424441192362}. Best is trial 83 with value: 25.946252822875977.


Mean is: 20.188026428222656, Std is: 15.929167747497559



[I 2025-02-12 19:46:42,248] Trial 85 finished with value: 18.2468318939209 and parameters: {'reset_noise_scale': 0.1308409128251381, 'forward_reward_weight': 1.1367324076734586, 'ctrl_cost_weight': 0.19936844544067744, 'healthy_reward': 1.4124547988814946, 'contact_cost_weight': 0.0007173656451646176, 'healthy_z_lower': 0.10303149813841521, 'healthy_z_upper': 1.1858965463142142, 'contact_force_min': -0.5365669989887883, 'contact_force_max': 0.5132576100583389, 'learning_rate': 0.0006951858912222554, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9696341762994303, 'gae_lambda': 0.9069394082048752, 'clip_range': 0.3976419079988186, 'ent_coef': 0.014071937164129352, 'std_penalty_weight': 0.1648410397128397}. Best is trial 83 with value: 25.946252822875977.


Mean is: 20.911460876464844, Std is: 16.164838790893555



[I 2025-02-12 19:48:15,725] Trial 86 finished with value: 10.531303405761719 and parameters: {'reset_noise_scale': 0.11059580235193238, 'forward_reward_weight': 1.2118186090786254, 'ctrl_cost_weight': 0.2628654807312987, 'healthy_reward': 1.4177962275148142, 'contact_cost_weight': 0.0006780506707633089, 'healthy_z_lower': 0.10044219351876153, 'healthy_z_upper': 1.1864303389827988, 'contact_force_min': -0.5234528972944807, 'contact_force_max': 0.5121671192074394, 'learning_rate': 0.0006857984279455359, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.969473779044249, 'gae_lambda': 0.9087254406241282, 'clip_range': 0.3928224726592879, 'ent_coef': 0.025571387355938113, 'std_penalty_weight': 0.1341543779291321}. Best is trial 83 with value: 25.946252822875977.


Mean is: 12.104270935058594, Std is: 11.725051879882812



[I 2025-02-12 19:50:00,512] Trial 87 finished with value: 19.157926559448242 and parameters: {'reset_noise_scale': 0.12980885373637135, 'forward_reward_weight': 1.036623427760322, 'ctrl_cost_weight': 0.2970661672283034, 'healthy_reward': 1.3480362921337146, 'contact_cost_weight': 0.0006387287744707708, 'healthy_z_lower': 0.104390984852025, 'healthy_z_upper': 1.1867456051356375, 'contact_force_min': -0.5502916262898483, 'contact_force_max': 0.5645486089147227, 'learning_rate': 0.000808209531973464, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9705666464854377, 'gae_lambda': 0.9088512255977715, 'clip_range': 0.37772161953475414, 'ent_coef': 0.0031810767964948593, 'std_penalty_weight': 0.17081472410292087}. Best is trial 83 with value: 25.946252822875977.


Mean is: 21.3232479095459, Std is: 12.676434516906738



[I 2025-02-12 19:51:32,976] Trial 88 finished with value: 11.003880500793457 and parameters: {'reset_noise_scale': 0.13211762924990716, 'forward_reward_weight': 1.1398579968299076, 'ctrl_cost_weight': 0.29536348696413806, 'healthy_reward': 1.3431238034455948, 'contact_cost_weight': 0.0007123922879047575, 'healthy_z_lower': 0.10011167244943286, 'healthy_z_upper': 1.1835190526050712, 'contact_force_min': -0.5517781566020813, 'contact_force_max': 0.5145063442101248, 'learning_rate': 0.0008133241129956195, 'n_steps': 8192, 'batch_size': 4096, 'gamma': 0.969704863373396, 'gae_lambda': 0.9118063338129034, 'clip_range': 0.3972267877602212, 'ent_coef': 0.0026664218126671033, 'std_penalty_weight': 0.1641367770133086}. Best is trial 83 with value: 25.946252822875977.


Mean is: 12.840595245361328, Std is: 11.190145492553711



[I 2025-02-12 19:53:14,742] Trial 89 finished with value: 16.155500411987305 and parameters: {'reset_noise_scale': 0.12816736015006594, 'forward_reward_weight': 1.0635717422207718, 'ctrl_cost_weight': 0.3609332839119772, 'healthy_reward': 1.4007404694396028, 'contact_cost_weight': 0.0006295399325033088, 'healthy_z_lower': 0.10428752323467026, 'healthy_z_upper': 1.188833910675789, 'contact_force_min': -0.5782510251118954, 'contact_force_max': 0.5589874085792434, 'learning_rate': 0.0007989525659579108, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9702565257325769, 'gae_lambda': 0.9028359947720141, 'clip_range': 0.37947112445094816, 'ent_coef': 0.013383762630786259, 'std_penalty_weight': 0.17126023983069977}. Best is trial 83 with value: 25.946252822875977.


Mean is: 17.975147247314453, Std is: 10.625035285949707



[I 2025-02-12 19:54:53,835] Trial 90 finished with value: 18.212207794189453 and parameters: {'reset_noise_scale': 0.11269438758408928, 'forward_reward_weight': 1.1227107221315933, 'ctrl_cost_weight': 0.20573674866477976, 'healthy_reward': 1.4982525582572261, 'contact_cost_weight': 0.0007254670641449998, 'healthy_z_lower': 0.11823987124711999, 'healthy_z_upper': 1.155067266831744, 'contact_force_min': -0.5394499088764224, 'contact_force_max': 0.5630958766186284, 'learning_rate': 0.0009714453396319545, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9688772243949085, 'gae_lambda': 0.9219662178367445, 'clip_range': 0.38805929454351235, 'ent_coef': 0.0032756095166110964, 'std_penalty_weight': 0.2817886580973374}. Best is trial 83 with value: 25.946252822875977.


Mean is: 23.106904983520508, Std is: 17.37010383605957



[I 2025-02-12 19:56:38,247] Trial 91 finished with value: 21.370126724243164 and parameters: {'reset_noise_scale': 0.12168029901317387, 'forward_reward_weight': 1.0393283918017109, 'ctrl_cost_weight': 0.20645184604134587, 'healthy_reward': 1.4342676277246724, 'contact_cost_weight': 0.0007117403192614717, 'healthy_z_lower': 0.1050272863896761, 'healthy_z_upper': 1.1542009711569865, 'contact_force_min': -0.5379577967912584, 'contact_force_max': 0.566087497937404, 'learning_rate': 0.0005238198829047628, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.969796951016399, 'gae_lambda': 0.9225657500312276, 'clip_range': 0.38798161906942724, 'ent_coef': 0.0036872829567439315, 'std_penalty_weight': 0.2932314745997482}. Best is trial 83 with value: 25.946252822875977.


Mean is: 26.17017364501953, Std is: 16.369482040405273



[I 2025-02-12 19:58:16,919] Trial 92 finished with value: 16.778017044067383 and parameters: {'reset_noise_scale': 0.11207487080405261, 'forward_reward_weight': 1.0347383605074392, 'ctrl_cost_weight': 0.20547560925192543, 'healthy_reward': 1.427068049647556, 'contact_cost_weight': 0.0007200665414800242, 'healthy_z_lower': 0.11768307805088707, 'healthy_z_upper': 1.1522444972751937, 'contact_force_min': -0.5406200544597317, 'contact_force_max': 0.5634365730047454, 'learning_rate': 0.0005311175777217912, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9707764466281332, 'gae_lambda': 0.9135171226404023, 'clip_range': 0.386150194058497, 'ent_coef': 0.0028210607330431104, 'std_penalty_weight': 0.3002691121496924}. Best is trial 83 with value: 25.946252822875977.


Mean is: 21.71119499206543, Std is: 16.429189682006836



[I 2025-02-12 19:59:44,157] Trial 93 finished with value: 5.9829912185668945 and parameters: {'reset_noise_scale': 0.12180386703004188, 'forward_reward_weight': 1.1299810970633666, 'ctrl_cost_weight': 0.23104719421435443, 'healthy_reward': 1.431772114097082, 'contact_cost_weight': 0.0006760998528058616, 'healthy_z_lower': 0.10467151017860164, 'healthy_z_upper': 1.1372710147858702, 'contact_force_min': -0.5021964104510894, 'contact_force_max': 0.5970488018653213, 'learning_rate': 0.0006533057012325361, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9707562269132834, 'gae_lambda': 0.9197139982045307, 'clip_range': 0.4036051194775322, 'ent_coef': 0.018367166098018195, 'std_penalty_weight': 0.32128451298586397}. Best is trial 83 with value: 25.946252822875977.


Mean is: 9.830653190612793, Std is: 11.975871086120605



[I 2025-02-12 20:01:24,601] Trial 94 finished with value: 18.97806167602539 and parameters: {'reset_noise_scale': 0.13804203051546574, 'forward_reward_weight': 1.0847868151190534, 'ctrl_cost_weight': 0.3042532434210381, 'healthy_reward': 1.4929085251539076, 'contact_cost_weight': 0.0007028286146329299, 'healthy_z_lower': 0.11254007628783863, 'healthy_z_upper': 1.1785388509784693, 'contact_force_min': -0.5822458287195349, 'contact_force_max': 0.5669035148317885, 'learning_rate': 0.00098883349333734, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9698299437358024, 'gae_lambda': 0.9202809252687504, 'clip_range': 0.37289659483824644, 'ent_coef': 0.003074486762640474, 'std_penalty_weight': 0.28712119048774143}. Best is trial 83 with value: 25.946252822875977.


Mean is: 23.03473663330078, Std is: 14.12878704071045



[I 2025-02-12 20:03:03,265] Trial 95 finished with value: 10.802331924438477 and parameters: {'reset_noise_scale': 0.13702925279049105, 'forward_reward_weight': 1.235999844665662, 'ctrl_cost_weight': 0.39038441677677294, 'healthy_reward': 1.4975754123362834, 'contact_cost_weight': 0.0006991387472878986, 'healthy_z_lower': 0.11281390792358167, 'healthy_z_upper': 1.1432834505166227, 'contact_force_min': -0.580327366821872, 'contact_force_max': 0.6056608709504882, 'learning_rate': 0.0009907138444605755, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.9690385387194077, 'gae_lambda': 0.921606019436791, 'clip_range': 0.3882206480574056, 'ent_coef': 0.003597493042516091, 'std_penalty_weight': 0.3336689808881012}. Best is trial 83 with value: 25.946252822875977.


Mean is: 14.204071998596191, Std is: 10.194953918457031



[I 2025-02-12 20:04:40,150] Trial 96 finished with value: 11.012130737304688 and parameters: {'reset_noise_scale': 0.14343715235546184, 'forward_reward_weight': 1.124966629450731, 'ctrl_cost_weight': 0.3329135638340779, 'healthy_reward': 1.3884932418797111, 'contact_cost_weight': 0.0006450288437626493, 'healthy_z_lower': 0.10352534195871932, 'healthy_z_upper': 1.155569238379549, 'contact_force_min': -0.5355625453300346, 'contact_force_max': 0.5116041707726134, 'learning_rate': 0.0007668131528581714, 'n_steps': 8192, 'batch_size': 512, 'gamma': 0.96993216975838, 'gae_lambda': 0.9039822244063072, 'clip_range': 0.41995445452376845, 'ent_coef': 0.006753935219732872, 'std_penalty_weight': 0.28803821846381567}. Best is trial 83 with value: 25.946252822875977.


Mean is: 14.39720344543457, Std is: 11.752166748046875



[I 2025-02-12 20:06:25,372] Trial 97 finished with value: 18.609283447265625 and parameters: {'reset_noise_scale': 0.1287790916752619, 'forward_reward_weight': 1.0807948207937113, 'ctrl_cost_weight': 0.30142086441568405, 'healthy_reward': 1.3494803865187075, 'contact_cost_weight': 0.00019233716127893557, 'healthy_z_lower': 0.11209911476944152, 'healthy_z_upper': 1.1285690432426865, 'contact_force_min': -0.5618170393117885, 'contact_force_max': 0.5825628507496203, 'learning_rate': 0.0009734499097481003, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9704235441002391, 'gae_lambda': 0.9072014509848181, 'clip_range': 0.3743603853684117, 'ent_coef': 0.0034231877951773734, 'std_penalty_weight': 0.2782847104796179}. Best is trial 83 with value: 25.946252822875977.


Mean is: 21.76706886291504, Std is: 11.347318649291992



[I 2025-02-12 20:08:04,030] Trial 98 finished with value: 14.059103012084961 and parameters: {'reset_noise_scale': 0.12882704495207797, 'forward_reward_weight': 1.0820424366643948, 'ctrl_cost_weight': 0.3045354714797962, 'healthy_reward': 1.3343110342331672, 'contact_cost_weight': 0.0007241230200077763, 'healthy_z_lower': 0.11656381369715857, 'healthy_z_upper': 1.1269634935962092, 'contact_force_min': -0.5567102221012313, 'contact_force_max': 0.5855223702281677, 'learning_rate': 0.0008955649721013961, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9704677946248161, 'gae_lambda': 0.9249918413717498, 'clip_range': 0.37357117275606744, 'ent_coef': 0.004105523580915516, 'std_penalty_weight': 0.2748196585228853}. Best is trial 83 with value: 25.946252822875977.


Mean is: 17.342214584350586, Std is: 11.94642162322998



[I 2025-02-12 20:09:38,097] Trial 99 finished with value: 11.116708755493164 and parameters: {'reset_noise_scale': 0.13812516029406519, 'forward_reward_weight': 1.1909457022622005, 'ctrl_cost_weight': 0.2355597223556882, 'healthy_reward': 1.2709951345423887, 'contact_cost_weight': 0.0003664404857708982, 'healthy_z_lower': 0.11275217318309962, 'healthy_z_upper': 1.111537814567343, 'contact_force_min': -0.5897144577457931, 'contact_force_max': 0.611945690565202, 'learning_rate': 0.0005752878050771929, 'n_steps': 8192, 'batch_size': 4096, 'gamma': 0.9697426706006769, 'gae_lambda': 0.9056585949909872, 'clip_range': 0.39625726184979654, 'ent_coef': 0.001902068746484831, 'std_penalty_weight': 0.3588038142073344}. Best is trial 83 with value: 25.946252822875977.


Mean is: 15.564428329467773, Std is: 12.395966529846191



[I 2025-02-12 20:11:26,226] Trial 100 finished with value: 26.830921173095703 and parameters: {'reset_noise_scale': 0.1224648700491494, 'forward_reward_weight': 1.0798217517026751, 'ctrl_cost_weight': 0.2788960190947023, 'healthy_reward': 1.4972086156641724, 'contact_cost_weight': 0.00019495257535118138, 'healthy_z_lower': 0.10525289571959973, 'healthy_z_upper': 1.1803240798353063, 'contact_force_min': -0.5187992701613672, 'contact_force_max': 0.5870857431066443, 'learning_rate': 0.000983439712869658, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9686135698396399, 'gae_lambda': 0.9145395422692033, 'clip_range': 0.37757085535729756, 'ent_coef': 0.00017055556769922042, 'std_penalty_weight': 0.28813167612676016}. Best is trial 100 with value: 26.830921173095703.


Mean is: 30.054834365844727, Std is: 11.189023971557617



[I 2025-02-12 20:13:07,464] Trial 101 finished with value: 25.19329071044922 and parameters: {'reset_noise_scale': 0.12099159337852684, 'forward_reward_weight': 1.0738991621491558, 'ctrl_cost_weight': 0.2845788398941709, 'healthy_reward': 1.4933080971914108, 'contact_cost_weight': 0.00019097603318072117, 'healthy_z_lower': 0.10528990899548793, 'healthy_z_upper': 1.1768552234908454, 'contact_force_min': -0.5111124868716399, 'contact_force_max': 0.6313433811360442, 'learning_rate': 0.0009223463040606536, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9684755657510167, 'gae_lambda': 0.9099820607755159, 'clip_range': 0.37598945006805706, 'ent_coef': 0.004655176475653071, 'std_penalty_weight': 0.2846395558203833}. Best is trial 100 with value: 26.830921173095703.


Mean is: 28.28472900390625, Std is: 10.860884666442871



[I 2025-02-12 20:14:46,639] Trial 102 finished with value: 22.10065460205078 and parameters: {'reset_noise_scale': 0.12200017876548269, 'forward_reward_weight': 1.0684618413244094, 'ctrl_cost_weight': 0.28595911569004107, 'healthy_reward': 1.3628383252242746, 'contact_cost_weight': 0.00011597952320176016, 'healthy_z_lower': 0.10696983352920704, 'healthy_z_upper': 1.1798101528532434, 'contact_force_min': -0.5188573785345967, 'contact_force_max': 0.5889307945331378, 'learning_rate': 0.0004918993447089924, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9685198503475811, 'gae_lambda': 0.9096384708680562, 'clip_range': 0.36935531093716023, 'ent_coef': 0.00011521536155912457, 'std_penalty_weight': 0.2631485752256217}. Best is trial 100 with value: 26.830921173095703.


Mean is: 25.19683265686035, Std is: 11.765890121459961



[I 2025-02-12 20:16:23,856] Trial 103 finished with value: -0.29791396856307983 and parameters: {'reset_noise_scale': 0.12218532979019461, 'forward_reward_weight': 1.070255246046447, 'ctrl_cost_weight': 0.34013714169982645, 'healthy_reward': 0.5128785900770816, 'contact_cost_weight': 0.0001500489684814228, 'healthy_z_lower': 0.10998667274697327, 'healthy_z_upper': 1.176293483264662, 'contact_force_min': -0.5138270034763344, 'contact_force_max': 0.6247148116694641, 'learning_rate': 0.0005087111566081206, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9684664686277951, 'gae_lambda': 0.914618754901164, 'clip_range': 0.3769965927690908, 'ent_coef': 0.00012555310537752776, 'std_penalty_weight': 0.26307254113779716}. Best is trial 100 with value: 26.830921173095703.


Mean is: 0.06020425260066986, Std is: 1.361290693283081



[I 2025-02-12 20:17:54,276] Trial 104 finished with value: 17.350875854492188 and parameters: {'reset_noise_scale': 0.126830014383405, 'forward_reward_weight': 1.0406490773225103, 'ctrl_cost_weight': 0.28265753251985587, 'healthy_reward': 1.3581872137286424, 'contact_cost_weight': 0.0001781544897024386, 'healthy_z_lower': 0.1073310645112776, 'healthy_z_upper': 1.1747397331754241, 'contact_force_min': -0.567559736182478, 'contact_force_max': 0.6333936597309873, 'learning_rate': 0.0008619827671243383, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9681772016297896, 'gae_lambda': 0.9002953203466977, 'clip_range': 0.36942943411606527, 'ent_coef': 0.005133598713560114, 'std_penalty_weight': 0.2487934811673645}. Best is trial 100 with value: 26.830921173095703.


Mean is: 20.773950576782227, Std is: 13.758699417114258



[I 2025-02-12 20:19:16,481] Trial 105 finished with value: 9.239829063415527 and parameters: {'reset_noise_scale': 0.12058828347785126, 'forward_reward_weight': 1.0261488859405676, 'ctrl_cost_weight': 0.3147931193658321, 'healthy_reward': 1.4459288409256854, 'contact_cost_weight': 0.00028720379561956267, 'healthy_z_lower': 0.11225025380769149, 'healthy_z_upper': 1.0018626401771713, 'contact_force_min': -0.5534055637020718, 'contact_force_max': 0.5875132951553458, 'learning_rate': 0.0005930531691872756, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9709068780085706, 'gae_lambda': 0.9104226486154499, 'clip_range': 0.3847495587172955, 'ent_coef': 0.002005122168056612, 'std_penalty_weight': 0.3196323341614089}. Best is trial 100 with value: 26.830921173095703.


Mean is: 12.945157051086426, Std is: 11.592469215393066



[I 2025-02-12 20:20:56,802] Trial 106 finished with value: 19.87623405456543 and parameters: {'reset_noise_scale': 0.1075185181661828, 'forward_reward_weight': 1.0791629583891142, 'ctrl_cost_weight': 0.3576264491171371, 'healthy_reward': 1.4814104104422623, 'contact_cost_weight': 0.00021127822995394843, 'healthy_z_lower': 0.12350810971994099, 'healthy_z_upper': 1.1586997096058573, 'contact_force_min': -0.5001946359635498, 'contact_force_max': 0.6407469161467325, 'learning_rate': 0.0007155908924582377, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9716637155725593, 'gae_lambda': 0.9184508538598863, 'clip_range': 0.37489620544250235, 'ent_coef': 0.006383281030401448, 'std_penalty_weight': 0.24252998972734865}. Best is trial 100 with value: 26.830921173095703.


Mean is: 22.29336929321289, Std is: 9.966338157653809



[I 2025-02-12 20:22:29,038] Trial 107 finished with value: 12.397492408752441 and parameters: {'reset_noise_scale': 0.10849284904711749, 'forward_reward_weight': 1.1526308264366552, 'ctrl_cost_weight': 0.38083571881022754, 'healthy_reward': 1.3821828975508403, 'contact_cost_weight': 0.0002101217241336929, 'healthy_z_lower': 0.1241109057755326, 'healthy_z_upper': 1.1796836287136414, 'contact_force_min': -0.5211715080471722, 'contact_force_max': 0.5910678998034341, 'learning_rate': 0.000708365376683877, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9717086657054249, 'gae_lambda': 0.9187393682330903, 'clip_range': 0.3748918822904761, 'ent_coef': 0.006443932814825687, 'std_penalty_weight': 0.30174106641725235}. Best is trial 100 with value: 26.830921173095703.


Mean is: 15.624873161315918, Std is: 10.695862770080566



[I 2025-02-12 20:24:06,538] Trial 108 finished with value: 12.346687316894531 and parameters: {'reset_noise_scale': 0.1353948387971538, 'forward_reward_weight': 1.1080234174399843, 'ctrl_cost_weight': 0.41084272451329884, 'healthy_reward': 1.3202824646883748, 'contact_cost_weight': 0.00010505135589313551, 'healthy_z_lower': 0.10547729313044428, 'healthy_z_upper': 1.1916317851023925, 'contact_force_min': -0.5030871730024633, 'contact_force_max': 0.6371701953618539, 'learning_rate': 0.00048778182122241175, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9723126647108826, 'gae_lambda': 0.914432640635158, 'clip_range': 0.379056315206712, 'ent_coef': 0.007621616596695048, 'std_penalty_weight': 0.28888169332904706}. Best is trial 100 with value: 26.830921173095703.


Mean is: 14.4563627243042, Std is: 7.302905082702637



[I 2025-02-12 20:25:42,953] Trial 109 finished with value: 22.054845809936523 and parameters: {'reset_noise_scale': 0.10553356890733714, 'forward_reward_weight': 1.0798866465587826, 'ctrl_cost_weight': 0.2758896592172877, 'healthy_reward': 1.476903160119161, 'contact_cost_weight': 0.0001365667986912362, 'healthy_z_lower': 0.13121407461861462, 'healthy_z_upper': 1.1594842321003216, 'contact_force_min': -0.524496902643235, 'contact_force_max': 0.6160936310179576, 'learning_rate': 0.0008549639166793176, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9704937147227531, 'gae_lambda': 0.9028657172106043, 'clip_range': 0.3662569298112519, 'ent_coef': 0.004033626453597726, 'std_penalty_weight': 0.26462032687411485}. Best is trial 100 with value: 26.830921173095703.


Mean is: 25.791486740112305, Std is: 14.120760917663574



[I 2025-02-12 20:27:16,936] Trial 110 finished with value: 18.85300064086914 and parameters: {'reset_noise_scale': 0.10393584496401775, 'forward_reward_weight': 1.052899324018762, 'ctrl_cost_weight': 0.25369007129129534, 'healthy_reward': 1.4842619770762269, 'contact_cost_weight': 0.0001536150164781899, 'healthy_z_lower': 0.12292182411403793, 'healthy_z_upper': 1.1608191497845433, 'contact_force_min': -0.5235182896053044, 'contact_force_max': 0.6048323518538861, 'learning_rate': 0.0008409506607624376, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9710543668697785, 'gae_lambda': 0.9029900860011953, 'clip_range': 0.36510248490635333, 'ent_coef': 0.01221361064045777, 'std_penalty_weight': 0.2657403211155709}. Best is trial 100 with value: 26.830921173095703.


Mean is: 22.934865951538086, Std is: 15.360357284545898



[I 2025-02-12 20:28:50,531] Trial 111 finished with value: 20.049116134643555 and parameters: {'reset_noise_scale': 0.10444125595709823, 'forward_reward_weight': 1.0640664498673353, 'ctrl_cost_weight': 0.2792011677919185, 'healthy_reward': 1.4822078983780091, 'contact_cost_weight': 0.0001333574015013631, 'healthy_z_lower': 0.1317979743082922, 'healthy_z_upper': 1.159126314759129, 'contact_force_min': -0.5241941266889105, 'contact_force_max': 0.6090329473507962, 'learning_rate': 0.0008666423092784536, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9711015173970304, 'gae_lambda': 0.9031159645711615, 'clip_range': 0.3651514070045034, 'ent_coef': 0.011741652762132905, 'std_penalty_weight': 0.2635719991011208}. Best is trial 100 with value: 26.830921173095703.


Mean is: 23.864967346191406, Std is: 14.477449417114258



[I 2025-02-12 20:30:29,662] Trial 112 finished with value: 21.736961364746094 and parameters: {'reset_noise_scale': 0.08909150090071855, 'forward_reward_weight': 1.0680868172509224, 'ctrl_cost_weight': 0.3528782382692614, 'healthy_reward': 1.4460045909529167, 'contact_cost_weight': 0.0001231357531931385, 'healthy_z_lower': 0.1315667736052744, 'healthy_z_upper': 1.1793813676102254, 'contact_force_min': -0.5450956165621619, 'contact_force_max': 0.6405712778712158, 'learning_rate': 0.0007641266324214186, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9726548543799701, 'gae_lambda': 0.9048359284356554, 'clip_range': 0.39121754253035695, 'ent_coef': 0.007771163640278357, 'std_penalty_weight': 0.24524974925044}. Best is trial 100 with value: 26.830921173095703.


Mean is: 24.06292724609375, Std is: 9.4840669631958



[I 2025-02-12 20:32:02,243] Trial 113 finished with value: 16.119609832763672 and parameters: {'reset_noise_scale': 0.0708050158016385, 'forward_reward_weight': 1.107067888209752, 'ctrl_cost_weight': 0.35393461511949165, 'healthy_reward': 1.4399629121355282, 'contact_cost_weight': 0.0001234730480977408, 'healthy_z_lower': 0.13117059754336727, 'healthy_z_upper': 1.1492763734052442, 'contact_force_min': -0.513055356634486, 'contact_force_max': 0.6602765028001583, 'learning_rate': 0.0007466172250435275, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9728161569150036, 'gae_lambda': 0.9042552499764055, 'clip_range': 0.3924717570720201, 'ent_coef': 0.00771154733553932, 'std_penalty_weight': 0.24651552603968208}. Best is trial 100 with value: 26.830921173095703.


Mean is: 18.788007736206055, Std is: 10.824460983276367



[I 2025-02-12 20:33:39,889] Trial 114 finished with value: 13.731244087219238 and parameters: {'reset_noise_scale': 0.08495707553107382, 'forward_reward_weight': 1.0617403362747913, 'ctrl_cost_weight': 0.46291389083448453, 'healthy_reward': 1.4048514411705866, 'contact_cost_weight': 0.00012133477788756761, 'healthy_z_lower': 0.13391714744427147, 'healthy_z_upper': 1.1905374323993034, 'contact_force_min': -0.5482000614755655, 'contact_force_max': 0.6443803235696983, 'learning_rate': 0.0006158427447410441, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.971334445326719, 'gae_lambda': 0.9103987201468615, 'clip_range': 0.3830672229787161, 'ent_coef': 0.011170132548125782, 'std_penalty_weight': 0.21127286609434745}. Best is trial 100 with value: 26.830921173095703.


Mean is: 15.255634307861328, Std is: 7.215267181396484



[I 2025-02-12 20:35:14,203] Trial 115 finished with value: 20.988683700561523 and parameters: {'reset_noise_scale': 0.09352092489079838, 'forward_reward_weight': 1.0265484269999676, 'ctrl_cost_weight': 0.2748354660244209, 'healthy_reward': 1.4441785176117052, 'contact_cost_weight': 0.00026873271494387075, 'healthy_z_lower': 0.1407877688833885, 'healthy_z_upper': 1.1591424019966763, 'contact_force_min': -0.527818117945322, 'contact_force_max': 0.6664334558429683, 'learning_rate': 0.0008906815652930549, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9717169218793575, 'gae_lambda': 0.9020810529681471, 'clip_range': 0.3685938139134932, 'ent_coef': 0.0052986095596661365, 'std_penalty_weight': 0.22491617183412022}. Best is trial 100 with value: 26.830921173095703.


Mean is: 24.0317440032959, Std is: 13.529753684997559



[I 2025-02-12 20:36:51,910] Trial 116 finished with value: 22.970115661621094 and parameters: {'reset_noise_scale': 0.09571038655723722, 'forward_reward_weight': 0.9777105694728014, 'ctrl_cost_weight': 0.271347124180608, 'healthy_reward': 1.4797852124439834, 'contact_cost_weight': 0.0002581184975463604, 'healthy_z_lower': 0.13820330031742342, 'healthy_z_upper': 1.1419994075040787, 'contact_force_min': -0.5259916910699916, 'contact_force_max': 0.6236311623745036, 'learning_rate': 0.0008892163360269861, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9734069681913005, 'gae_lambda': 0.9013804558946078, 'clip_range': 0.3692421633231275, 'ent_coef': 0.005540772701592486, 'std_penalty_weight': 0.2604521239336649}. Best is trial 100 with value: 26.830921173095703.


Mean is: 26.278215408325195, Std is: 12.701376914978027



[I 2025-02-12 20:38:30,514] Trial 117 finished with value: 23.235984802246094 and parameters: {'reset_noise_scale': 0.09223063876826079, 'forward_reward_weight': 1.0245500758964166, 'ctrl_cost_weight': 0.274026883169459, 'healthy_reward': 1.4397732634079201, 'contact_cost_weight': 0.00028314209700986125, 'healthy_z_lower': 0.13908937360344806, 'healthy_z_upper': 1.1411843014376308, 'contact_force_min': -0.5254004628781188, 'contact_force_max': 0.6624883512148196, 'learning_rate': 0.000864488246014209, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9730807810048974, 'gae_lambda': 0.9016068758799785, 'clip_range': 0.36857840222721033, 'ent_coef': 0.008972620851789589, 'std_penalty_weight': 0.22606813458180444}. Best is trial 100 with value: 26.830921173095703.


Mean is: 26.25431251525879, Std is: 13.351410865783691



[I 2025-02-12 20:40:12,291] Trial 118 finished with value: 17.200712203979492 and parameters: {'reset_noise_scale': 0.09323617670778007, 'forward_reward_weight': 0.9322084323529976, 'ctrl_cost_weight': 0.32517184470824756, 'healthy_reward': 1.4435049125363595, 'contact_cost_weight': 0.0002701915379805916, 'healthy_z_lower': 0.13869347421676337, 'healthy_z_upper': 1.1410311274862275, 'contact_force_min': -0.5135924663228876, 'contact_force_max': 0.6680486666854316, 'learning_rate': 0.000633027554529507, 'n_steps': 2048, 'batch_size': 4096, 'gamma': 0.9738574685130927, 'gae_lambda': 0.9015458030643156, 'clip_range': 0.3698523369737974, 'ent_coef': 0.008935107034746145, 'std_penalty_weight': 0.22335033733321052}. Best is trial 100 with value: 26.830921173095703.


Mean is: 20.016828536987305, Std is: 12.60851764678955



[I 2025-02-12 20:42:00,789] Trial 119 finished with value: 5.083797454833984 and parameters: {'reset_noise_scale': 0.08968315248041114, 'forward_reward_weight': 1.0290207194227556, 'ctrl_cost_weight': 0.26822423368859066, 'healthy_reward': 0.618040584261314, 'contact_cost_weight': 0.00031801271399175423, 'healthy_z_lower': 0.14848270887377743, 'healthy_z_upper': 1.1352736992924466, 'contact_force_min': -0.5277517471486307, 'contact_force_max': 0.621142897037848, 'learning_rate': 0.0008958488472934255, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9732177478554613, 'gae_lambda': 0.9056742563157695, 'clip_range': 0.36802106604522733, 'ent_coef': 0.004938100630118236, 'std_penalty_weight': 0.20682238754982313}. Best is trial 100 with value: 26.830921173095703.


Mean is: 5.920944690704346, Std is: 4.047662258148193



[I 2025-02-12 20:43:21,511] Trial 120 finished with value: -1.9451431035995483 and parameters: {'reset_noise_scale': 0.07598538309949897, 'forward_reward_weight': 0.9776849004603633, 'ctrl_cost_weight': 0.24744503014968663, 'healthy_reward': 1.4261355578437724, 'contact_cost_weight': 0.00016842126497421277, 'healthy_z_lower': 0.14176127449141224, 'healthy_z_upper': 1.148224764985782, 'contact_force_min': -0.5433375519761628, 'contact_force_max': 0.6937633197062858, 'learning_rate': 0.000778473629624766, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9735354632615165, 'gae_lambda': 0.9007228960050113, 'clip_range': 0.3922711906886722, 'ent_coef': 0.04226194177111994, 'std_penalty_weight': 0.25806320256819876}. Best is trial 100 with value: 26.830921173095703.


Mean is: -1.0963857173919678, Std is: 3.2889516353607178



[I 2025-02-12 20:45:03,748] Trial 121 finished with value: 22.94108772277832 and parameters: {'reset_noise_scale': 0.10030422752480447, 'forward_reward_weight': 1.019380556881494, 'ctrl_cost_weight': 0.22646501695156473, 'healthy_reward': 1.4813717554873458, 'contact_cost_weight': 0.0002302530856735089, 'healthy_z_lower': 0.15686596579470036, 'healthy_z_upper': 1.1612556401863299, 'contact_force_min': -0.5292109219317284, 'contact_force_max': 0.6566727655314997, 'learning_rate': 0.0008979412928598881, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9719222021257352, 'gae_lambda': 0.9056611478547513, 'clip_range': 0.3638920845060372, 'ent_coef': 0.0080808573337079, 'std_penalty_weight': 0.2672481276417406}. Best is trial 100 with value: 26.830921173095703.


Mean is: 26.988290786743164, Std is: 15.143988609313965



[I 2025-02-12 20:46:45,479] Trial 122 finished with value: 22.242488861083984 and parameters: {'reset_noise_scale': 0.09973104880965467, 'forward_reward_weight': 0.9853954739544886, 'ctrl_cost_weight': 0.22512252999858184, 'healthy_reward': 1.4489210123946719, 'contact_cost_weight': 0.0004359505356156596, 'healthy_z_lower': 0.16674320777872587, 'healthy_z_upper': 1.1697202088456204, 'contact_force_min': -0.5126099538197273, 'contact_force_max': 0.6566266267000174, 'learning_rate': 0.0009098279091494509, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9725109913041737, 'gae_lambda': 0.9065270787802256, 'clip_range': 0.3616538706258247, 'ent_coef': 0.005107716639433811, 'std_penalty_weight': 0.23717352221872745}. Best is trial 100 with value: 26.830921173095703.


Mean is: 25.945463180541992, Std is: 15.612930297851562



[I 2025-02-12 20:48:32,313] Trial 123 finished with value: 23.291635513305664 and parameters: {'reset_noise_scale': 0.08871711099761788, 'forward_reward_weight': 0.9834305347938915, 'ctrl_cost_weight': 0.22773586048230113, 'healthy_reward': 1.4552230177422663, 'contact_cost_weight': 0.0004130355488707498, 'healthy_z_lower': 0.16105841042284666, 'healthy_z_upper': 1.169451937751144, 'contact_force_min': -0.5115488434913185, 'contact_force_max': 0.6515751935675205, 'learning_rate': 0.000684333065492628, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9744074146944417, 'gae_lambda': 0.9059892370516087, 'clip_range': 0.3509241765231461, 'ent_coef': 0.0014261466385217325, 'std_penalty_weight': 0.23778137665608418}. Best is trial 100 with value: 26.830921173095703.


Mean is: 27.029176712036133, Std is: 15.718392372131348



[I 2025-02-12 20:50:23,080] Trial 124 finished with value: 29.540721893310547 and parameters: {'reset_noise_scale': 0.09992457411893896, 'forward_reward_weight': 0.9375165139014502, 'ctrl_cost_weight': 0.2335425711705681, 'healthy_reward': 1.4555789615689085, 'contact_cost_weight': 0.00044799996214531517, 'healthy_z_lower': 0.1651107515941699, 'healthy_z_upper': 1.1785398092613655, 'contact_force_min': -0.5108373603949075, 'contact_force_max': 0.6559628587732265, 'learning_rate': 0.0007209069399409416, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9724390629151038, 'gae_lambda': 0.9061380774988709, 'clip_range': 0.3523669838738494, 'ent_coef': 0.001547276037473699, 'std_penalty_weight': 0.24361279419069615}. Best is trial 124 with value: 29.540721893310547.


Mean is: 32.245323181152344, Std is: 11.102046966552734



[I 2025-02-12 20:52:05,636] Trial 125 finished with value: 23.620986938476562 and parameters: {'reset_noise_scale': 0.08734557657419513, 'forward_reward_weight': 0.9706808723245142, 'ctrl_cost_weight': 0.22655465982415618, 'healthy_reward': 1.4834996708926278, 'contact_cost_weight': 0.00043069022282819975, 'healthy_z_lower': 0.16528455119534824, 'healthy_z_upper': 1.1715461806835195, 'contact_force_min': -0.508480440908941, 'contact_force_max': 0.6553252596639628, 'learning_rate': 0.000678964794824356, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.972548934076188, 'gae_lambda': 0.9051411744503572, 'clip_range': 0.3509373460985977, 'ent_coef': 0.00032020477211767723, 'std_penalty_weight': 0.23640237909371115}. Best is trial 124 with value: 29.540721893310547.


Mean is: 27.319473266601562, Std is: 15.644881248474121



[I 2025-02-12 20:53:49,238] Trial 126 finished with value: 24.314298629760742 and parameters: {'reset_noise_scale': 0.1001315729137184, 'forward_reward_weight': 0.9333737356092157, 'ctrl_cost_weight': 0.22804705564242103, 'healthy_reward': 1.4996176304126776, 'contact_cost_weight': 0.00039846630568925797, 'healthy_z_lower': 0.16479884913534365, 'healthy_z_upper': 1.171369117988636, 'contact_force_min': -0.5059944573877053, 'contact_force_max': 0.6574966208453211, 'learning_rate': 0.0006693504783765548, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.974152685624017, 'gae_lambda': 0.9109630425316041, 'clip_range': 0.353769827498985, 'ent_coef': 0.0001427825493573592, 'std_penalty_weight': 0.23600661502790077}. Best is trial 124 with value: 29.540721893310547.


Mean is: 28.023677825927734, Std is: 15.717267990112305



[I 2025-02-12 20:55:30,221] Trial 127 finished with value: 25.336952209472656 and parameters: {'reset_noise_scale': 0.09902748915902099, 'forward_reward_weight': 0.873634104980452, 'ctrl_cost_weight': 0.21947514513701458, 'healthy_reward': 1.4957639180736968, 'contact_cost_weight': 0.00042011958902367927, 'healthy_z_lower': 0.16662600316389276, 'healthy_z_upper': 1.1697613382638574, 'contact_force_min': -0.5098720077069171, 'contact_force_max': 0.6723776648281379, 'learning_rate': 0.0006798882692093842, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.974680234463293, 'gae_lambda': 0.9116826961320442, 'clip_range': 0.35036300134430803, 'ent_coef': 0.0007031940329803556, 'std_penalty_weight': 0.23337063854515058}. Best is trial 124 with value: 29.540721893310547.


Mean is: 28.886966705322266, Std is: 15.211919784545898



[I 2025-02-12 20:57:17,960] Trial 128 finished with value: 29.6131534576416 and parameters: {'reset_noise_scale': 0.1005490588178269, 'forward_reward_weight': 0.8643633240841365, 'ctrl_cost_weight': 0.2374992864208405, 'healthy_reward': 1.498695350856152, 'contact_cost_weight': 0.00040836139416050105, 'healthy_z_lower': 0.16719565504480935, 'healthy_z_upper': 1.1710582234585567, 'contact_force_min': -0.5077031040561613, 'contact_force_max': 0.6563251374718874, 'learning_rate': 0.0005664328321982201, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9748247242558172, 'gae_lambda': 0.9118417562293947, 'clip_range': 0.3514321330537669, 'ent_coef': 3.491544323143414e-06, 'std_penalty_weight': 0.23463388959902187}. Best is trial 128 with value: 29.6131534576416.


Mean is: 32.67757797241211, Std is: 13.060450553894043



[I 2025-02-12 20:58:57,643] Trial 129 finished with value: 22.87775421142578 and parameters: {'reset_noise_scale': 0.09972926130383179, 'forward_reward_weight': 0.868700175510322, 'ctrl_cost_weight': 0.22838472660242498, 'healthy_reward': 1.4605721015691697, 'contact_cost_weight': 0.00042240048163988284, 'healthy_z_lower': 0.164013245129911, 'healthy_z_upper': 1.171552828216344, 'contact_force_min': -0.5083941829927694, 'contact_force_max': 0.651694402210729, 'learning_rate': 0.0005935979127808467, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9743361008959174, 'gae_lambda': 0.9068873884055934, 'clip_range': 0.34941201743416106, 'ent_coef': 0.0013028409896101252, 'std_penalty_weight': 0.23017167653253554}. Best is trial 128 with value: 29.6131534576416.


Mean is: 26.686939239501953, Std is: 16.54931640625



[I 2025-02-12 21:00:40,912] Trial 130 finished with value: 26.045495986938477 and parameters: {'reset_noise_scale': 0.09633879200191937, 'forward_reward_weight': 0.8756002732438325, 'ctrl_cost_weight': 0.24235459994511113, 'healthy_reward': 1.4970152240321049, 'contact_cost_weight': 0.0004463844033136811, 'healthy_z_lower': 0.17683808251027, 'healthy_z_upper': 1.1705898738125404, 'contact_force_min': -0.5073881523811504, 'contact_force_max': 0.679075839637156, 'learning_rate': 0.0004536822520988131, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9750187144813434, 'gae_lambda': 0.9153848521010989, 'clip_range': 0.35194363432161224, 'ent_coef': 0.001542343179386775, 'std_penalty_weight': 0.23309959242607203}. Best is trial 128 with value: 29.6131534576416.


Mean is: 29.76703453063965, Std is: 15.965447425842285



[I 2025-02-12 21:02:27,566] Trial 131 finished with value: 29.284393310546875 and parameters: {'reset_noise_scale': 0.09669615399172854, 'forward_reward_weight': 0.8790382480143454, 'ctrl_cost_weight': 0.24146850380561327, 'healthy_reward': 1.494699778610551, 'contact_cost_weight': 0.00041812788736494825, 'healthy_z_lower': 0.1776711440068986, 'healthy_z_upper': 1.1714608189077842, 'contact_force_min': -0.5069471750153153, 'contact_force_max': 0.6729109462498948, 'learning_rate': 0.0005696323944270359, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9748335952107756, 'gae_lambda': 0.9154706582026972, 'clip_range': 0.3499834857077236, 'ent_coef': 0.0007893412533126057, 'std_penalty_weight': 0.23764248708445515}. Best is trial 128 with value: 29.6131534576416.


Mean is: 32.38471603393555, Std is: 13.046160697937012



[I 2025-02-12 21:04:13,222] Trial 132 finished with value: 26.40524673461914 and parameters: {'reset_noise_scale': 0.09655100481687806, 'forward_reward_weight': 0.8835692084634788, 'ctrl_cost_weight': 0.24613283087907442, 'healthy_reward': 1.4998322612198105, 'contact_cost_weight': 0.00041552382067576037, 'healthy_z_lower': 0.17580464920362424, 'healthy_z_upper': 1.1685715495963205, 'contact_force_min': -0.5000377608597025, 'contact_force_max': 0.6759689108333173, 'learning_rate': 0.0006588215656172072, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9756855940791587, 'gae_lambda': 0.9151354613093182, 'clip_range': 0.34926571167156045, 'ent_coef': 0.000320043226245267, 'std_penalty_weight': 0.23765175236464275}. Best is trial 128 with value: 29.6131534576416.


Mean is: 29.77018928527832, Std is: 14.159134864807129



[I 2025-02-12 21:06:03,659] Trial 133 finished with value: 25.12618637084961 and parameters: {'reset_noise_scale': 0.09637244438103694, 'forward_reward_weight': 0.826602951207793, 'ctrl_cost_weight': 0.2412723979761825, 'healthy_reward': 1.4978718460495906, 'contact_cost_weight': 0.0004070780751192921, 'healthy_z_lower': 0.17548441826508776, 'healthy_z_upper': 1.1712136695696076, 'contact_force_min': -0.5002192502969772, 'contact_force_max': 0.676280799030313, 'learning_rate': 0.0006646719167652128, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.975559764088973, 'gae_lambda': 0.9149161046416602, 'clip_range': 0.3511088696196294, 'ent_coef': 0.0008636917420239133, 'std_penalty_weight': 0.24188600006732303}. Best is trial 128 with value: 29.6131534576416.


Mean is: 28.68882179260254, Std is: 14.728572845458984



[I 2025-02-12 21:07:48,762] Trial 134 finished with value: 25.088634490966797 and parameters: {'reset_noise_scale': 0.08556300203984951, 'forward_reward_weight': 0.8460975116688709, 'ctrl_cost_weight': 0.24837590516384525, 'healthy_reward': 1.4932706267235365, 'contact_cost_weight': 0.0003996981620789924, 'healthy_z_lower': 0.17541052859257755, 'healthy_z_upper': 1.1723283199129282, 'contact_force_min': -0.5002878605700448, 'contact_force_max': 0.69860998048148, 'learning_rate': 0.00043855462357873755, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.975660424012995, 'gae_lambda': 0.9160914296589004, 'clip_range': 0.3514578306644314, 'ent_coef': 0.00013281110169857321, 'std_penalty_weight': 0.23913929459140043}. Best is trial 128 with value: 29.6131534576416.


Mean is: 28.69586181640625, Std is: 15.084208488464355



[I 2025-02-12 21:09:34,235] Trial 135 finished with value: 25.974966049194336 and parameters: {'reset_noise_scale': 0.08671171543247581, 'forward_reward_weight': 0.8238299872795835, 'ctrl_cost_weight': 0.18074274410223057, 'healthy_reward': 1.4995685943611865, 'contact_cost_weight': 0.00042710443398812705, 'healthy_z_lower': 0.18731015984870483, 'healthy_z_upper': 1.1748959193461965, 'contact_force_min': -0.5006971432147329, 'contact_force_max': 0.6753995890088792, 'learning_rate': 0.00043012527131021014, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9757560772230283, 'gae_lambda': 0.9153658107731975, 'clip_range': 0.35283809906612507, 'ent_coef': 0.00020358205307147398, 'std_penalty_weight': 0.23848287583963004}. Best is trial 128 with value: 29.6131534576416.


Mean is: 29.87774658203125, Std is: 16.365034103393555



[I 2025-02-12 21:10:54,879] Trial 136 finished with value: 4.465216159820557 and parameters: {'reset_noise_scale': 0.08433071492621996, 'forward_reward_weight': 0.8244626914861052, 'ctrl_cost_weight': 0.1830438576695097, 'healthy_reward': 1.4992628641260421, 'contact_cost_weight': 0.0004585626726281047, 'healthy_z_lower': 0.18695845846500803, 'healthy_z_upper': 0.862833818489188, 'contact_force_min': -0.50288715190856, 'contact_force_max': 0.6972813672024517, 'learning_rate': 0.0004280832715192827, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.975633052631415, 'gae_lambda': 0.9159271548051126, 'clip_range': 0.344097696848278, 'ent_coef': 2.8109486829526085e-05, 'std_penalty_weight': 0.21427910602255135}. Best is trial 128 with value: 29.6131534576416.


Mean is: 6.389036178588867, Std is: 8.978104591369629



[I 2025-02-12 21:12:38,421] Trial 137 finished with value: 22.707008361816406 and parameters: {'reset_noise_scale': 0.07738542169929596, 'forward_reward_weight': 0.8896478613671622, 'ctrl_cost_weight': 0.24757150260776659, 'healthy_reward': 1.4985653648489239, 'contact_cost_weight': 0.00039481161362791753, 'healthy_z_lower': 0.17568028715867312, 'healthy_z_upper': 1.1705683964989178, 'contact_force_min': -0.5029582601586844, 'contact_force_max': 0.6776449475339507, 'learning_rate': 0.0004601075107498497, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9760808840875617, 'gae_lambda': 0.9128348281501618, 'clip_range': 0.3552382491339223, 'ent_coef': 0.0016394652739470128, 'std_penalty_weight': 0.25089780860011446}. Best is trial 128 with value: 29.6131534576416.


Mean is: 26.723342895507812, Std is: 16.007848739624023



[I 2025-02-12 21:14:21,313] Trial 138 finished with value: 23.74716567993164 and parameters: {'reset_noise_scale': 0.08671389958021357, 'forward_reward_weight': 0.8498468770232643, 'ctrl_cost_weight': 0.17483110462908957, 'healthy_reward': 1.4985240709750918, 'contact_cost_weight': 0.00046202628986817697, 'healthy_z_lower': 0.1804944159232435, 'healthy_z_upper': 1.1800378390738426, 'contact_force_min': -0.5107912740662542, 'contact_force_max': 0.6866399512567558, 'learning_rate': 0.0003834436862590499, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9749910316428734, 'gae_lambda': 0.9146469632485701, 'clip_range': 0.3441194689023764, 'ent_coef': 0.0003306282917124525, 'std_penalty_weight': 0.20179346288116085}. Best is trial 128 with value: 29.6131534576416.


Mean is: 27.420635223388672, Std is: 18.2041015625



[I 2025-02-12 21:16:07,770] Trial 139 finished with value: 30.35907554626465 and parameters: {'reset_noise_scale': 0.09678954730173456, 'forward_reward_weight': 0.8493868892360449, 'ctrl_cost_weight': 0.1714627411017044, 'healthy_reward': 1.4990105749506923, 'contact_cost_weight': 0.0004929816019125261, 'healthy_z_lower': 0.18266855666573611, 'healthy_z_upper': 1.1805537329563331, 'contact_force_min': -0.5015255562876796, 'contact_force_max': 0.6728930472479921, 'learning_rate': 0.00040716707792258077, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9749937337135025, 'gae_lambda': 0.9175536199743678, 'clip_range': 0.33188574983514935, 'ent_coef': 1.7122414684532647e-07, 'std_penalty_weight': 0.20018367493293268}. Best is trial 139 with value: 30.35907554626465.


Mean is: 33.7688102722168, Std is: 17.03303337097168



[I 2025-02-12 21:17:52,416] Trial 140 finished with value: 26.010597229003906 and parameters: {'reset_noise_scale': 0.09779877609008776, 'forward_reward_weight': 0.8941231112545437, 'ctrl_cost_weight': 0.2500391323640098, 'healthy_reward': 1.4659948124897246, 'contact_cost_weight': 0.0004896022492245013, 'healthy_z_lower': 0.18346015750581018, 'healthy_z_upper': 1.1943338897391245, 'contact_force_min': -0.5026471438764417, 'contact_force_max': 0.701705688182499, 'learning_rate': 0.0004147849921133224, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9765306245440268, 'gae_lambda': 0.9161294286698769, 'clip_range': 0.3317068719899328, 'ent_coef': 0.0021523981913889904, 'std_penalty_weight': 0.2006853825381597}. Best is trial 139 with value: 30.35907554626465.


Mean is: 28.96199607849121, Std is: 14.706592559814453



[I 2025-02-12 21:19:38,093] Trial 141 finished with value: 25.471609115600586 and parameters: {'reset_noise_scale': 0.09725083076232836, 'forward_reward_weight': 0.8432843188484258, 'ctrl_cost_weight': 0.25186497273139563, 'healthy_reward': 1.4681766013438728, 'contact_cost_weight': 0.0004910181858119032, 'healthy_z_lower': 0.19385951426209522, 'healthy_z_upper': 1.1949045515046086, 'contact_force_min': -0.5007533928362474, 'contact_force_max': 0.7058690805639778, 'learning_rate': 0.00040005504470551725, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9764418421534294, 'gae_lambda': 0.9170905221779478, 'clip_range': 0.347395889328967, 'ent_coef': 0.0020275808069050304, 'std_penalty_weight': 0.19922843486970043}. Best is trial 139 with value: 30.35907554626465.


Mean is: 28.52962875366211, Std is: 15.349310874938965



[I 2025-02-12 21:21:24,001] Trial 142 finished with value: 25.24599838256836 and parameters: {'reset_noise_scale': 0.0959303063850284, 'forward_reward_weight': 0.7944463043772099, 'ctrl_cost_weight': 0.2531996674281288, 'healthy_reward': 1.4663689028232634, 'contact_cost_weight': 0.0005007914916182587, 'healthy_z_lower': 0.19522751819341652, 'healthy_z_upper': 1.1942057015612482, 'contact_force_min': -0.5002222897391169, 'contact_force_max': 0.7251144728716264, 'learning_rate': 0.000335488337662511, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9769574861511767, 'gae_lambda': 0.9171204441458598, 'clip_range': 0.3471213584738621, 'ent_coef': 0.0021678759767597105, 'std_penalty_weight': 0.18055726219706653}. Best is trial 139 with value: 30.35907554626465.


Mean is: 27.996849060058594, Std is: 15.235331535339355



[I 2025-02-12 21:23:08,615] Trial 143 finished with value: 23.166229248046875 and parameters: {'reset_noise_scale': 0.0971704885007205, 'forward_reward_weight': 0.8045753255304106, 'ctrl_cost_weight': 0.25677556085040404, 'healthy_reward': 1.4717356789046518, 'contact_cost_weight': 0.000489894686169221, 'healthy_z_lower': 0.19321761452981454, 'healthy_z_upper': 1.1995921439191464, 'contact_force_min': -0.5170335084296797, 'contact_force_max': 0.7265802484769377, 'learning_rate': 0.00041293920978866834, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9769445153838828, 'gae_lambda': 0.9176881078277107, 'clip_range': 0.33011044387980265, 'ent_coef': 0.0022891086464472813, 'std_penalty_weight': 0.17865278662111247}. Best is trial 139 with value: 30.35907554626465.


Mean is: 26.065935134887695, Std is: 16.2309627532959



[I 2025-02-12 21:24:43,966] Trial 144 finished with value: 10.152485847473145 and parameters: {'reset_noise_scale': 0.09584690584082516, 'forward_reward_weight': 0.771085368297228, 'ctrl_cost_weight': 0.1864755580317968, 'healthy_reward': 1.4592845176610003, 'contact_cost_weight': 0.0005164941199026321, 'healthy_z_lower': 0.18409541744716665, 'healthy_z_upper': 1.194714431719072, 'contact_force_min': -0.5168898406083315, 'contact_force_max': 0.6718856221173639, 'learning_rate': 0.0005414626723083165, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9765702328375424, 'gae_lambda': 0.916566443962839, 'clip_range': 0.34203837211459137, 'ent_coef': 0.03297082218127359, 'std_penalty_weight': 0.19555609500963517}. Best is trial 139 with value: 30.35907554626465.


Mean is: 12.739197731018066, Std is: 13.227466583251953



[I 2025-02-12 21:26:24,277] Trial 145 finished with value: 28.354154586791992 and parameters: {'reset_noise_scale': 0.1025852282429613, 'forward_reward_weight': 0.8893886861991078, 'ctrl_cost_weight': 0.1616927955619623, 'healthy_reward': 1.4720111230125408, 'contact_cost_weight': 0.0004898186131524656, 'healthy_z_lower': 0.19230227913668652, 'healthy_z_upper': 1.1826748241520129, 'contact_force_min': -0.5001333955014382, 'contact_force_max': 0.7124677225863324, 'learning_rate': 0.0003247006557404905, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.97472616064905, 'gae_lambda': 0.9288929360086499, 'clip_range': 0.3333428249474301, 'ent_coef': 0.002338702778132007, 'std_penalty_weight': 0.18107983574661762}. Best is trial 139 with value: 30.35907554626465.


Mean is: 31.653223037719727, Std is: 18.21886444091797



[I 2025-02-12 21:27:54,761] Trial 146 finished with value: 17.479482650756836 and parameters: {'reset_noise_scale': 0.10220438826743566, 'forward_reward_weight': 0.8832724597703523, 'ctrl_cost_weight': 0.15094377116071905, 'healthy_reward': 1.4665659766105503, 'contact_cost_weight': 0.0004955754008793175, 'healthy_z_lower': 0.2021035149435204, 'healthy_z_upper': 1.1827152435321424, 'contact_force_min': -0.5155327206801814, 'contact_force_max': 0.703162619703112, 'learning_rate': 0.0003166448173657786, 'n_steps': 2048, 'batch_size': 4096, 'gamma': 0.9775502165782609, 'gae_lambda': 0.9328146492496687, 'clip_range': 0.3333170683084298, 'ent_coef': 0.004960209054732947, 'std_penalty_weight': 0.20496191196245575}. Best is trial 139 with value: 30.35907554626465.


Mean is: 20.988508224487305, Std is: 17.120378494262695



[I 2025-02-12 21:29:30,320] Trial 147 finished with value: 24.461490631103516 and parameters: {'reset_noise_scale': 0.0910887638568198, 'forward_reward_weight': 0.8624492029949606, 'ctrl_cost_weight': 0.16687352261925203, 'healthy_reward': 1.4208660363645274, 'contact_cost_weight': 0.0004444031818639339, 'healthy_z_lower': 0.2096116405793928, 'healthy_z_upper': 1.189169250961561, 'contact_force_min': -0.5172569173216889, 'contact_force_max': 0.7172225064154304, 'learning_rate': 0.00040300078917525226, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9747443790890468, 'gae_lambda': 0.9127415552082535, 'clip_range': 0.34744437747062556, 'ent_coef': 0.006558928107047279, 'std_penalty_weight': 0.1782338330448821}. Best is trial 139 with value: 30.35907554626465.


Mean is: 27.637502670288086, Std is: 17.81935691833496



[I 2025-02-12 21:31:12,492] Trial 148 finished with value: 30.901912689208984 and parameters: {'reset_noise_scale': 0.0811219889284557, 'forward_reward_weight': 0.794019967338759, 'ctrl_cost_weight': 0.1909084649203593, 'healthy_reward': 1.4695470159426132, 'contact_cost_weight': 0.00048075670076003045, 'healthy_z_lower': 0.19353492629665098, 'healthy_z_upper': 1.1936905952567158, 'contact_force_min': -0.5349939620294489, 'contact_force_max': 0.7307512698224117, 'learning_rate': 0.0003564760563058714, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9762294172462653, 'gae_lambda': 0.9261117656360015, 'clip_range': 0.3320669028429513, 'ent_coef': 0.0026780011357637598, 'std_penalty_weight': 0.20050838533111062}. Best is trial 148 with value: 30.901912689208984.


Mean is: 34.09079360961914, Std is: 15.903982162475586



[I 2025-02-12 21:32:53,393] Trial 149 finished with value: 3.1935036182403564 and parameters: {'reset_noise_scale': 0.08197988968121245, 'forward_reward_weight': 0.7902073245638552, 'ctrl_cost_weight': 0.7504935490491764, 'healthy_reward': 1.4621091173313767, 'contact_cost_weight': 0.0004817004753845244, 'healthy_z_lower': 0.19552207176784125, 'healthy_z_upper': 1.1912094500597101, 'contact_force_min': -0.5334212752930569, 'contact_force_max': 0.7391074097968007, 'learning_rate': 0.000329522414643619, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.976116996897632, 'gae_lambda': 0.9246560448100333, 'clip_range': 0.3158251945244912, 'ent_coef': 0.002606991223035261, 'std_penalty_weight': 0.1418402026636369}. Best is trial 148 with value: 30.901912689208984.


Mean is: 3.428070545196533, Std is: 1.6537402868270874



[I 2025-02-12 21:34:37,048] Trial 150 finished with value: 27.01203155517578 and parameters: {'reset_noise_scale': 0.09357068476286888, 'forward_reward_weight': 0.8963626681776112, 'ctrl_cost_weight': 0.1790003006868123, 'healthy_reward': 1.4241354088087679, 'contact_cost_weight': 0.0005125825888972612, 'healthy_z_lower': 0.19101269728650955, 'healthy_z_upper': 1.1996912459715918, 'contact_force_min': -0.537360928011209, 'contact_force_max': 0.708722247665219, 'learning_rate': 0.0003652207685599865, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9751562451157769, 'gae_lambda': 0.9305841620298491, 'clip_range': 0.3289334097372034, 'ent_coef': 0.002396080529159213, 'std_penalty_weight': 0.18388054414289104}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.24475860595703, Std is: 17.580583572387695



[I 2025-02-12 21:36:22,801] Trial 151 finished with value: 29.112361907958984 and parameters: {'reset_noise_scale': 0.09512168511815386, 'forward_reward_weight': 0.8916961432963991, 'ctrl_cost_weight': 0.18750151187714612, 'healthy_reward': 1.4179931770535756, 'contact_cost_weight': 0.0005121375833300455, 'healthy_z_lower': 0.1915709716091049, 'healthy_z_upper': 1.194043642090034, 'contact_force_min': -0.5003801496743105, 'contact_force_max': 0.7090979494221813, 'learning_rate': 0.000352521456070281, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9752264093126575, 'gae_lambda': 0.9341827269676528, 'clip_range': 0.33123809753021316, 'ent_coef': 0.0024556575498564815, 'std_penalty_weight': 0.18459569886803065}. Best is trial 148 with value: 30.901912689208984.


Mean is: 32.06852340698242, Std is: 16.014245986938477



[I 2025-02-12 21:38:02,982] Trial 152 finished with value: 21.624975204467773 and parameters: {'reset_noise_scale': 0.09154689758064931, 'forward_reward_weight': 0.8991548693345224, 'ctrl_cost_weight': 0.1866191945962012, 'healthy_reward': 1.4008345965218716, 'contact_cost_weight': 0.0005405955283352314, 'healthy_z_lower': 0.1820445042794149, 'healthy_z_upper': 1.1821169835074696, 'contact_force_min': -0.5342264281123156, 'contact_force_max': 0.7048057011465542, 'learning_rate': 0.000358866746970164, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9750059552422441, 'gae_lambda': 0.9307285898548754, 'clip_range': 0.3258126447241212, 'ent_coef': 0.0026424409212169597, 'std_penalty_weight': 0.21797535690851083}. Best is trial 148 with value: 30.901912689208984.


Mean is: 25.329797744750977, Std is: 16.996522903442383



[I 2025-02-12 21:39:44,449] Trial 153 finished with value: 25.7365665435791 and parameters: {'reset_noise_scale': 0.08154047141632423, 'forward_reward_weight': 0.863976055330667, 'ctrl_cost_weight': 0.16454089176350264, 'healthy_reward': 1.4278078049128626, 'contact_cost_weight': 0.00047396987079907367, 'healthy_z_lower': 0.18633340452905844, 'healthy_z_upper': 1.199936015194315, 'contact_force_min': -0.5449034909717151, 'contact_force_max': 0.7555511753544392, 'learning_rate': 0.0004813815521503906, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9752204362480633, 'gae_lambda': 0.9282318007729102, 'clip_range': 0.3273005156712463, 'ent_coef': 0.004192882696460918, 'std_penalty_weight': 0.2017116339814635}. Best is trial 148 with value: 30.901912689208984.


Mean is: 29.443073272705078, Std is: 18.37527847290039



[I 2025-02-12 21:41:25,095] Trial 154 finished with value: 25.400005340576172 and parameters: {'reset_noise_scale': 0.06888889162171717, 'forward_reward_weight': 0.8416413080946791, 'ctrl_cost_weight': 0.17179207356241077, 'healthy_reward': 1.4118970156747, 'contact_cost_weight': 0.00047192146765967464, 'healthy_z_lower': 0.18837921166467425, 'healthy_z_upper': 1.1991367154684922, 'contact_force_min': -0.5442162932626615, 'contact_force_max': 0.7300092452139917, 'learning_rate': 0.00027669842048289907, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9752880444890893, 'gae_lambda': 0.9279242325111712, 'clip_range': 0.3314574153786633, 'ent_coef': 0.006697430785246969, 'std_penalty_weight': 0.19638287271020072}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.983919143676758, Std is: 18.24962615966797



[I 2025-02-12 21:43:05,282] Trial 155 finished with value: 25.411779403686523 and parameters: {'reset_noise_scale': 0.0808327437308766, 'forward_reward_weight': 0.9048319884313701, 'ctrl_cost_weight': 0.1915696124954404, 'healthy_reward': 1.4253056846923102, 'contact_cost_weight': 0.00044822478674764007, 'healthy_z_lower': 0.19288875253651144, 'healthy_z_upper': 1.1999940846190478, 'contact_force_min': -0.5213373830868446, 'contact_force_max': 0.7598596692915052, 'learning_rate': 0.0004671853047236947, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9765132011060247, 'gae_lambda': 0.9354030225694642, 'clip_range': 0.31957469637572156, 'ent_coef': 0.004023960614555091, 'std_penalty_weight': 0.15967734533413291}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.210973739624023, Std is: 17.53031349182129



[I 2025-02-12 21:44:44,628] Trial 156 finished with value: 23.73897933959961 and parameters: {'reset_noise_scale': 0.07575282538092187, 'forward_reward_weight': 0.9283769207135776, 'ctrl_cost_weight': 0.16071638572533994, 'healthy_reward': 1.3869438612658977, 'contact_cost_weight': 0.0005223642258713485, 'healthy_z_lower': 0.19047635443614022, 'healthy_z_upper': 1.1857223840211226, 'contact_force_min': -0.756722222114373, 'contact_force_max': 0.7134720581866423, 'learning_rate': 0.0003874490485181211, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9759004090950227, 'gae_lambda': 0.9302811780959459, 'clip_range': 0.33331878867063086, 'ent_coef': 0.004255071522656537, 'std_penalty_weight': 0.1832881162433305}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.051321029663086, Std is: 18.0717716217041



[I 2025-02-12 21:46:18,366] Trial 157 finished with value: 20.1077938079834 and parameters: {'reset_noise_scale': 0.10289869509185967, 'forward_reward_weight': 0.86020011726487, 'ctrl_cost_weight': 0.147273738413345, 'healthy_reward': 1.4521423826297604, 'contact_cost_weight': 0.0003709680752678733, 'healthy_z_lower': 0.2172679685566301, 'healthy_z_upper': 1.1920510539366982, 'contact_force_min': -0.5346864911591404, 'contact_force_max': 0.7534154302574088, 'learning_rate': 0.0002992192730472824, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9753169927065224, 'gae_lambda': 0.9246787811536312, 'clip_range': 0.3273375063094677, 'ent_coef': 0.006919830387820794, 'std_penalty_weight': 0.20133074776430465}. Best is trial 148 with value: 30.901912689208984.


Mean is: 23.8297061920166, Std is: 18.486555099487305



[I 2025-02-12 21:47:51,033] Trial 158 finished with value: 20.363880157470703 and parameters: {'reset_noise_scale': 0.09407497724861892, 'forward_reward_weight': 0.8140263102547702, 'ctrl_cost_weight': 0.20788097552305942, 'healthy_reward': 1.439364609061846, 'contact_cost_weight': 0.0003428058377535862, 'healthy_z_lower': 0.2005325032640658, 'healthy_z_upper': 1.1831957066642216, 'contact_force_min': -0.5539677342022262, 'contact_force_max': 0.6861589590115222, 'learning_rate': 0.0005050921133072352, 'n_steps': 4096, 'batch_size': 2048, 'gamma': 0.9764715709351681, 'gae_lambda': 0.9297362700766376, 'clip_range': 0.3397043450362798, 'ent_coef': 0.0026644454034603516, 'std_penalty_weight': 0.21281576878570058}. Best is trial 148 with value: 30.901912689208984.


Mean is: 24.110061645507812, Std is: 17.602933883666992



[I 2025-02-12 21:49:28,448] Trial 159 finished with value: 22.1009578704834 and parameters: {'reset_noise_scale': 0.08196934592750675, 'forward_reward_weight': 0.7537760525507807, 'ctrl_cost_weight': 0.17959747531716908, 'healthy_reward': 1.4742977270490036, 'contact_cost_weight': 0.0005584028468023966, 'healthy_z_lower': 0.2051912836367075, 'healthy_z_upper': 1.1907746780217252, 'contact_force_min': -0.652883128855264, 'contact_force_max': 0.7120005957505042, 'learning_rate': 0.00035373972150111953, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9775820764548562, 'gae_lambda': 0.9394360739748827, 'clip_range': 0.315946086586943, 'ent_coef': 0.010263956359140593, 'std_penalty_weight': 0.18605033603817345}. Best is trial 148 with value: 30.901912689208984.


Mean is: 25.328218460083008, Std is: 17.34617042541504



[I 2025-02-12 21:51:08,304] Trial 160 finished with value: 16.78947639465332 and parameters: {'reset_noise_scale': 0.1055912624656079, 'forward_reward_weight': 0.8817695281199832, 'ctrl_cost_weight': 0.20898186224887724, 'healthy_reward': 1.4254966253088805, 'contact_cost_weight': 0.0004747941188529859, 'healthy_z_lower': 0.18429217601298276, 'healthy_z_upper': 1.1803254565255055, 'contact_force_min': -0.5227709330282976, 'contact_force_max': 0.7404711746868272, 'learning_rate': 0.0002437154830247909, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9752748316509312, 'gae_lambda': 0.9208280115505985, 'clip_range': 0.32249658569826084, 'ent_coef': 0.0050098118712023675, 'std_penalty_weight': 0.4786374912508957}. Best is trial 148 with value: 30.901912689208984.


Mean is: 24.673831939697266, Std is: 16.47249984741211



[I 2025-02-12 21:52:49,897] Trial 161 finished with value: 27.758583068847656 and parameters: {'reset_noise_scale': 0.0792627144747141, 'forward_reward_weight': 0.9002782513718146, 'ctrl_cost_weight': 0.1920316651609397, 'healthy_reward': 1.4314768063506411, 'contact_cost_weight': 0.0004465410606418291, 'healthy_z_lower': 0.19153277125996251, 'healthy_z_upper': 1.1989708480286454, 'contact_force_min': -0.5000323244693028, 'contact_force_max': 0.7983639667734311, 'learning_rate': 0.00041019270546213454, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9764150214621992, 'gae_lambda': 0.9280140838061157, 'clip_range': 0.3193374261722826, 'ent_coef': 0.003667194279658409, 'std_penalty_weight': 0.16017668828518572}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.39615821838379, Std is: 16.46665382385254



[I 2025-02-12 21:54:24,067] Trial 162 finished with value: 21.836685180664062 and parameters: {'reset_noise_scale': 0.0660219858611629, 'forward_reward_weight': 0.9009834795013622, 'ctrl_cost_weight': 0.16019740071959976, 'healthy_reward': 1.4601603813911885, 'contact_cost_weight': 0.0005208452159709975, 'healthy_z_lower': 0.17203056794710597, 'healthy_z_upper': 1.1933656071062837, 'contact_force_min': -0.5010712993679461, 'contact_force_max': 0.794879764816756, 'learning_rate': 0.0004091144242254023, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9759035213038862, 'gae_lambda': 0.9263369018816137, 'clip_range': 0.33072725225999283, 'ent_coef': 0.0019984184651593924, 'std_penalty_weight': 0.1403761960192488}. Best is trial 148 with value: 30.901912689208984.


Mean is: 24.31970977783203, Std is: 17.688352584838867



[I 2025-02-12 21:56:06,668] Trial 163 finished with value: 25.809083938598633 and parameters: {'reset_noise_scale': 0.09026758502152382, 'forward_reward_weight': 0.9425804477895157, 'ctrl_cost_weight': 0.1909774014473345, 'healthy_reward': 1.3982422709201838, 'contact_cost_weight': 0.00045581307655465385, 'healthy_z_lower': 0.19733959533255538, 'healthy_z_upper': 1.1846557811955545, 'contact_force_min': -0.5150089685743324, 'contact_force_max': 0.7736085885657485, 'learning_rate': 0.000454303431716534, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9763630118702238, 'gae_lambda': 0.9235562247788025, 'clip_range': 0.3256513621030277, 'ent_coef': 0.0035760148276725145, 'std_penalty_weight': 0.16922152755139866}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.740774154663086, Std is: 17.32457160949707



[I 2025-02-12 21:57:48,927] Trial 164 finished with value: 25.431217193603516 and parameters: {'reset_noise_scale': 0.08007026814550504, 'forward_reward_weight': 0.9481415990880769, 'ctrl_cost_weight': 0.14203157339472616, 'healthy_reward': 1.3833018314176218, 'contact_cost_weight': 0.0004517602363333352, 'healthy_z_lower': 0.1870015634040839, 'healthy_z_upper': 1.1846179007887685, 'contact_force_min': -0.5426936749004874, 'contact_force_max': 0.774964360140201, 'learning_rate': 0.0005528872955193949, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9750835714481451, 'gae_lambda': 0.9226643336934763, 'clip_range': 0.3237006341051246, 'ent_coef': 0.006524458899646512, 'std_penalty_weight': 0.1721066838593425}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.362512588500977, Std is: 17.031845092773438



[I 2025-02-12 21:59:26,252] Trial 165 finished with value: 21.440183639526367 and parameters: {'reset_noise_scale': 0.09117024925773143, 'forward_reward_weight': 0.9227158872581066, 'ctrl_cost_weight': 0.17280254042303683, 'healthy_reward': 1.4014528681231997, 'contact_cost_weight': 0.0005090075266126492, 'healthy_z_lower': 0.19952364891920557, 'healthy_z_upper': 1.17938093706833, 'contact_force_min': -0.5182307912634858, 'contact_force_max': 0.7935064496531534, 'learning_rate': 0.0004594033077403646, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9746161653797812, 'gae_lambda': 0.9334675281352841, 'clip_range': 0.3279723432741074, 'ent_coef': 0.004035483216258224, 'std_penalty_weight': 0.14763277270810338}. Best is trial 148 with value: 30.901912689208984.


Mean is: 24.090391159057617, Std is: 17.95134925842285



[I 2025-02-12 22:01:01,563] Trial 166 finished with value: 21.564823150634766 and parameters: {'reset_noise_scale': 0.08876427548428349, 'forward_reward_weight': 0.8915851954349463, 'ctrl_cost_weight': 0.19435801624445237, 'healthy_reward': 1.4260725844352373, 'contact_cost_weight': 0.0004682112749141053, 'healthy_z_lower': 0.18112559146563334, 'healthy_z_upper': 1.1644044817452817, 'contact_force_min': -0.5265616305526107, 'contact_force_max': 0.7608818525562244, 'learning_rate': 0.00048370929466465734, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9755412333639265, 'gae_lambda': 0.9269426672909947, 'clip_range': 0.33543800841952226, 'ent_coef': 0.008625317754065893, 'std_penalty_weight': 0.12427289253712362}. Best is trial 148 with value: 30.901912689208984.


Mean is: 23.782188415527344, Std is: 17.84270477294922



[I 2025-02-12 22:02:41,971] Trial 167 finished with value: 24.99590301513672 and parameters: {'reset_noise_scale': 0.0785497670105974, 'forward_reward_weight': 0.8649245368462524, 'ctrl_cost_weight': 0.2102398648999926, 'healthy_reward': 1.4454111614668832, 'contact_cost_weight': 0.000537569064211507, 'healthy_z_lower': 0.1789809524742112, 'healthy_z_upper': 1.1855239171153478, 'contact_force_min': -0.5122315584740857, 'contact_force_max': 0.690891355567663, 'learning_rate': 0.0005727205001174789, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.975816098795713, 'gae_lambda': 0.9228344215639569, 'clip_range': 0.30812486232350333, 'ent_coef': 0.004429676046630601, 'std_penalty_weight': 0.1655599364624598}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.78148078918457, Std is: 16.825199127197266



[I 2025-02-12 22:04:21,386] Trial 168 finished with value: 16.668787002563477 and parameters: {'reset_noise_scale': 0.07349197685976319, 'forward_reward_weight': 0.8315673611927107, 'ctrl_cost_weight': 0.19465266751913624, 'healthy_reward': 0.8647246226873978, 'contact_cost_weight': 0.0004368373977222074, 'healthy_z_lower': 0.19112316903748675, 'healthy_z_upper': 1.199625147203994, 'contact_force_min': -0.5335371366549772, 'contact_force_max': 0.807429332577626, 'learning_rate': 0.0004380449588353226, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.97743041294961, 'gae_lambda': 0.9297807737737289, 'clip_range': 0.32087727563751905, 'ent_coef': 0.00010327126021718585, 'std_penalty_weight': 0.1577877668564422}. Best is trial 148 with value: 30.901912689208984.


Mean is: 18.568321228027344, Std is: 12.03853988647461



[I 2025-02-12 22:06:03,141] Trial 169 finished with value: 24.93855094909668 and parameters: {'reset_noise_scale': 0.054224124615735086, 'forward_reward_weight': 0.9335937593034996, 'ctrl_cost_weight': 0.16721918558909923, 'healthy_reward': 1.4752414040034951, 'contact_cost_weight': 0.00048062335183313056, 'healthy_z_lower': 0.18546304684008155, 'healthy_z_upper': 1.1792810778837821, 'contact_force_min': -0.5728633619866578, 'contact_force_max': 0.8325879693438797, 'learning_rate': 0.00036815544236424045, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9761877021556367, 'gae_lambda': 0.919701924249325, 'clip_range': 0.3150542365630742, 'ent_coef': 0.0023780777613988074, 'std_penalty_weight': 0.18653752561585568}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.146888732910156, Std is: 17.19942283630371



[I 2025-02-12 22:07:44,257] Trial 170 finished with value: 24.902719497680664 and parameters: {'reset_noise_scale': 0.08373719402162849, 'forward_reward_weight': 0.9107050056386204, 'ctrl_cost_weight': 0.14092500130636393, 'healthy_reward': 1.4036479383129472, 'contact_cost_weight': 0.0003795340845934299, 'healthy_z_lower': 0.17267938122962267, 'healthy_z_upper': 1.1922408613985613, 'contact_force_min': -0.5178121486715236, 'contact_force_max': 0.784461985830499, 'learning_rate': 0.0005168181287961343, 'n_steps': 4096, 'batch_size': 512, 'gamma': 0.9738980910682451, 'gae_lambda': 0.9255381051920258, 'clip_range': 0.3423814785969644, 'ent_coef': 0.006324776001078101, 'std_penalty_weight': 0.21603825562876694}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.6180362701416, Std is: 17.197498321533203



[I 2025-02-12 22:09:28,676] Trial 171 finished with value: 29.700490951538086 and parameters: {'reset_noise_scale': 0.09688750431163977, 'forward_reward_weight': 0.8499553013863589, 'ctrl_cost_weight': 0.188785438030704, 'healthy_reward': 1.4762797012942857, 'contact_cost_weight': 0.0005015152369929405, 'healthy_z_lower': 0.1978090367830464, 'healthy_z_upper': 1.199963393495483, 'contact_force_min': -0.5004941896300573, 'contact_force_max': 0.6815044749919335, 'learning_rate': 0.00039204487051255405, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9763772283979777, 'gae_lambda': 0.9190826222173559, 'clip_range': 0.334588350058334, 'ent_coef': 0.0026868088877710916, 'std_penalty_weight': 0.1973038374284999}. Best is trial 148 with value: 30.901912689208984.


Mean is: 32.87306594848633, Std is: 16.07964515686035



[I 2025-02-12 22:11:13,913] Trial 172 finished with value: 29.090883255004883 and parameters: {'reset_noise_scale': 0.10210984930942855, 'forward_reward_weight': 0.883391667694555, 'ctrl_cost_weight': 0.18867495582599025, 'healthy_reward': 1.4496024582945313, 'contact_cost_weight': 0.00045701390780368827, 'healthy_z_lower': 0.19939095501592957, 'healthy_z_upper': 1.1996291524656857, 'contact_force_min': -0.5094368674810686, 'contact_force_max': 0.6795409448369556, 'learning_rate': 0.00046658917687154156, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9753437343792482, 'gae_lambda': 0.9314984675104556, 'clip_range': 0.3338513841281444, 'ent_coef': 0.0031648575308417344, 'std_penalty_weight': 0.17223769748937462}. Best is trial 148 with value: 30.901912689208984.


Mean is: 31.91086196899414, Std is: 16.372596740722656



[I 2025-02-12 22:12:52,569] Trial 173 finished with value: 24.19985580444336 and parameters: {'reset_noise_scale': 0.10179110193245705, 'forward_reward_weight': 0.8840923957972605, 'ctrl_cost_weight': 0.21672091176121663, 'healthy_reward': 1.4756894849973425, 'contact_cost_weight': 0.0004536995534967182, 'healthy_z_lower': 0.19789244301648845, 'healthy_z_upper': 1.1868613450625238, 'contact_force_min': -0.5063267837210537, 'contact_force_max': 0.6880864735145187, 'learning_rate': 0.0002961095127422524, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9754566390169032, 'gae_lambda': 0.9320728496116105, 'clip_range': 0.33312198239307245, 'ent_coef': 0.0023702939857339106, 'std_penalty_weight': 0.1713113302569054}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.165752410888672, Std is: 17.31290626525879



[I 2025-02-12 22:14:37,173] Trial 174 finished with value: 29.834787368774414 and parameters: {'reset_noise_scale': 0.09393282467457943, 'forward_reward_weight': 0.8084023380083641, 'ctrl_cost_weight': 0.1884949212526794, 'healthy_reward': 1.4499186611723942, 'contact_cost_weight': 0.00042270840623043085, 'healthy_z_lower': 0.19027793039346028, 'healthy_z_upper': 1.1802679631605835, 'contact_force_min': -0.5114959499800638, 'contact_force_max': 0.6833141669640127, 'learning_rate': 0.00042339316882655846, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9745083168671664, 'gae_lambda': 0.9361574041152779, 'clip_range': 0.3375721156314989, 'ent_coef': 0.00012399023317973034, 'std_penalty_weight': 0.1775322610085209}. Best is trial 148 with value: 30.901912689208984.


Mean is: 32.378665924072266, Std is: 14.329110145568848



[I 2025-02-12 22:16:18,206] Trial 175 finished with value: 26.555374145507812 and parameters: {'reset_noise_scale': 0.09373817552090345, 'forward_reward_weight': 0.8180279516661377, 'ctrl_cost_weight': 0.21295241387950548, 'healthy_reward': 1.4516538002122603, 'contact_cost_weight': 0.00043196081661127497, 'healthy_z_lower': 0.20327767976401198, 'healthy_z_upper': 1.176978376842992, 'contact_force_min': -0.5001008586415541, 'contact_force_max': 0.6790178367659406, 'learning_rate': 0.00038546704739022983, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9745697712185007, 'gae_lambda': 0.9396711550991971, 'clip_range': 0.33814149026214246, 'ent_coef': 0.0017687492649867671, 'std_penalty_weight': 0.18172549820309666}. Best is trial 148 with value: 30.901912689208984.


Mean is: 29.568593978881836, Std is: 16.58116340637207



[I 2025-02-12 22:17:56,621] Trial 176 finished with value: 23.995845794677734 and parameters: {'reset_noise_scale': 0.09363026138287672, 'forward_reward_weight': 0.8111349559782408, 'ctrl_cost_weight': 0.21429089555852088, 'healthy_reward': 1.4523924479570463, 'contact_cost_weight': 0.00035000450484705094, 'healthy_z_lower': 0.21174747333529825, 'healthy_z_upper': 1.1761969077146543, 'contact_force_min': -0.509809411481512, 'contact_force_max': 0.6816082460542167, 'learning_rate': 0.00035262545350136983, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9745062918201081, 'gae_lambda': 0.9365522311096396, 'clip_range': 0.3368948135846326, 'ent_coef': 4.784208312280484e-05, 'std_penalty_weight': 0.19094156236304174}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.236106872558594, Std is: 16.969913482666016



[I 2025-02-12 22:19:35,693] Trial 177 finished with value: 24.78913688659668 and parameters: {'reset_noise_scale': 0.0983645249292371, 'forward_reward_weight': 0.7756565251233003, 'ctrl_cost_weight': 0.2388291695821508, 'healthy_reward': 1.4514026652562289, 'contact_cost_weight': 0.00041781202495506594, 'healthy_z_lower': 0.20566830503655012, 'healthy_z_upper': 1.1629831315218366, 'contact_force_min': -0.5096697985842045, 'contact_force_max': 0.6983086361790581, 'learning_rate': 0.00039455191972456835, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9748811113843767, 'gae_lambda': 0.9391554681717291, 'clip_range': 0.3416465027278792, 'ent_coef': 0.0018259464650318285, 'std_penalty_weight': 0.15611601568303154}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.376445770263672, Std is: 16.5729923248291



[I 2025-02-12 22:21:12,668] Trial 178 finished with value: 24.400821685791016 and parameters: {'reset_noise_scale': 0.09449361103157483, 'forward_reward_weight': 0.8298233102530441, 'ctrl_cost_weight': 0.1801142607599753, 'healthy_reward': 1.4783949202068396, 'contact_cost_weight': 0.00038717853203118615, 'healthy_z_lower': 0.20172818063844425, 'healthy_z_upper': 1.191305939008419, 'contact_force_min': -0.5005461699420488, 'contact_force_max': 0.6788521563841882, 'learning_rate': 0.00042038676810099846, 'n_steps': 2048, 'batch_size': 4096, 'gamma': 0.9742791752094003, 'gae_lambda': 0.9416337582993327, 'clip_range': 0.33152808405900214, 'ent_coef': 0.005784010219589707, 'std_penalty_weight': 0.17776539746867148}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.437206268310547, Std is: 17.080848693847656



[I 2025-02-12 22:22:31,394] Trial 179 finished with value: 5.742779731750488 and parameters: {'reset_noise_scale': 0.10246829583649861, 'forward_reward_weight': 0.8506463686388427, 'ctrl_cost_weight': 0.23299881777839848, 'healthy_reward': 1.4808803641347246, 'contact_cost_weight': 0.00042828112223236234, 'healthy_z_lower': 0.19002137350789902, 'healthy_z_upper': 0.9156469348089509, 'contact_force_min': -0.5249098077544259, 'contact_force_max': 0.666590656297926, 'learning_rate': 0.00032252738655563594, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9748215964402311, 'gae_lambda': 0.918751294308092, 'clip_range': 0.33631742434629774, 'ent_coef': 9.993770701762766e-05, 'std_penalty_weight': 0.22350465806172734}. Best is trial 148 with value: 30.901912689208984.


Mean is: 8.124420166015625, Std is: 10.655887603759766



[I 2025-02-12 22:24:08,824] Trial 180 finished with value: 18.283191680908203 and parameters: {'reset_noise_scale': 0.0876155310346207, 'forward_reward_weight': 1.378264607852676, 'ctrl_cost_weight': 0.21587879004593977, 'healthy_reward': 1.499857703749952, 'contact_cost_weight': 0.0005013946794051938, 'healthy_z_lower': 0.2182634161338833, 'healthy_z_upper': 1.1754359151087637, 'contact_force_min': -0.5232665515330276, 'contact_force_max': 0.6959280117966207, 'learning_rate': 0.0002461408461341171, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9758355980748759, 'gae_lambda': 0.9358077132870706, 'clip_range': 0.34086718949985395, 'ent_coef': 0.00845870457069894, 'std_penalty_weight': 0.20990530637292046}. Best is trial 148 with value: 30.901912689208984.


Mean is: 21.440078735351562, Std is: 15.039572715759277



[I 2025-02-12 22:25:41,167] Trial 181 finished with value: 7.376613616943359 and parameters: {'reset_noise_scale': 0.0988100129498054, 'forward_reward_weight': 0.8722643614740082, 'ctrl_cost_weight': 0.5062559299437642, 'healthy_reward': 1.4417720745019784, 'contact_cost_weight': 0.00043886612115218344, 'healthy_z_lower': 0.1823855778661886, 'healthy_z_upper': 1.0638959630005513, 'contact_force_min': -0.5009551233880359, 'contact_force_max': 0.6715036546003709, 'learning_rate': 0.00037307221970138725, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9772583758984246, 'gae_lambda': 0.9314145415030561, 'clip_range': 0.3446120059786371, 'ent_coef': 0.0023859354443422037, 'std_penalty_weight': 0.1828858596946577}. Best is trial 148 with value: 30.901912689208984.


Mean is: 8.595149993896484, Std is: 6.662824630737305



[I 2025-02-12 22:27:21,801] Trial 182 finished with value: 26.420995712280273 and parameters: {'reset_noise_scale': 0.10682082413443178, 'forward_reward_weight': 0.7989113181227459, 'ctrl_cost_weight': 0.19978008718316212, 'healthy_reward': 1.462993075891991, 'contact_cost_weight': 0.00041043506651021174, 'healthy_z_lower': 0.17718626901276044, 'healthy_z_upper': 1.165401559515087, 'contact_force_min': -0.7128558918817364, 'contact_force_max': 0.7235204293948021, 'learning_rate': 0.000559799197164353, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9768050247554255, 'gae_lambda': 0.9137742390145112, 'clip_range': 0.3339413283781173, 'ent_coef': 0.003920447655083295, 'std_penalty_weight': 0.19467283114498724}. Best is trial 148 with value: 30.901912689208984.


Mean is: 29.707435607910156, Std is: 16.881864547729492



[I 2025-02-12 22:29:05,095] Trial 183 finished with value: 29.856714248657227 and parameters: {'reset_noise_scale': 0.10460856348135886, 'forward_reward_weight': 0.7494455948050255, 'ctrl_cost_weight': 0.19831852661863092, 'healthy_reward': 1.45925030263953, 'contact_cost_weight': 0.00041313777051933133, 'healthy_z_lower': 0.17713620317262102, 'healthy_z_upper': 1.164220659310702, 'contact_force_min': -0.6940561377900465, 'contact_force_max': 0.7208013408493789, 'learning_rate': 0.0005476277479484907, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9761054562840485, 'gae_lambda': 0.9339269868035811, 'clip_range': 0.33413944729320283, 'ent_coef': 0.004630138852783155, 'std_penalty_weight': 0.19454733419520231}. Best is trial 148 with value: 30.901912689208984.


Mean is: 32.74983215332031, Std is: 14.871020317077637



[I 2025-02-12 22:30:43,523] Trial 184 finished with value: 24.205501556396484 and parameters: {'reset_noise_scale': 0.10714435911868524, 'forward_reward_weight': 0.7525958373776882, 'ctrl_cost_weight': 0.20101471559004294, 'healthy_reward': 1.457307911106765, 'contact_cost_weight': 0.00040909394421849806, 'healthy_z_lower': 0.17828436977553408, 'healthy_z_upper': 1.1651329871531175, 'contact_force_min': -0.7000185583160068, 'contact_force_max': 0.7167335737368675, 'learning_rate': 0.0005448037141703497, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9766168039027073, 'gae_lambda': 0.9347767669788817, 'clip_range': 0.33082203980463404, 'ent_coef': 0.005765221632821985, 'std_penalty_weight': 0.19459393985779744}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.526630401611328, Std is: 17.066967010498047



[I 2025-02-12 22:32:21,157] Trial 185 finished with value: 23.568710327148438 and parameters: {'reset_noise_scale': 0.10484537149727319, 'forward_reward_weight': 0.7915853971314294, 'ctrl_cost_weight': 0.2382448197710986, 'healthy_reward': 1.4755566550988573, 'contact_cost_weight': 0.0005294878506170618, 'healthy_z_lower': 0.17114873481214193, 'healthy_z_upper': 1.1514637467354112, 'contact_force_min': -0.7154330242250595, 'contact_force_max': 0.7181666351707302, 'learning_rate': 0.0006080982564647457, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9778818055699526, 'gae_lambda': 0.932891105119942, 'clip_range': 0.3344782577793127, 'ent_coef': 0.003927825179152747, 'std_penalty_weight': 0.19094829839694535}. Best is trial 148 with value: 30.901912689208984.


Mean is: 26.47491455078125, Std is: 15.219853401184082



[I 2025-02-12 22:34:00,163] Trial 186 finished with value: 26.588184356689453 and parameters: {'reset_noise_scale': 0.099903556443095, 'forward_reward_weight': 0.7351442863298985, 'ctrl_cost_weight': 0.15140571021487106, 'healthy_reward': 1.433389214508877, 'contact_cost_weight': 0.0005064736787317567, 'healthy_z_lower': 0.17696592633918443, 'healthy_z_upper': 1.1636223709915092, 'contact_force_min': -0.6707953291366159, 'contact_force_max': 0.7313574484647524, 'learning_rate': 0.0005104146640646673, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9767859773274494, 'gae_lambda': 0.9369344433639804, 'clip_range': 0.3292527939299877, 'ent_coef': 0.002314404348847319, 'std_penalty_weight': 0.20874465444595242}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.331005096435547, Std is: 17.930137634277344



[I 2025-02-12 22:35:40,523] Trial 187 finished with value: 26.556835174560547 and parameters: {'reset_noise_scale': 0.10118898458029604, 'forward_reward_weight': 0.7728346242383533, 'ctrl_cost_weight': 0.14757191278792225, 'healthy_reward': 1.4365483324922963, 'contact_cost_weight': 0.0005100344257736574, 'healthy_z_lower': 0.1768464669317758, 'healthy_z_upper': 1.1612434596341432, 'contact_force_min': -0.6812385117193975, 'contact_force_max': 0.7447353928959181, 'learning_rate': 0.0005087875566512698, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9740300048210193, 'gae_lambda': 0.9383371506900574, 'clip_range': 0.33821882409692716, 'ent_coef': 0.007548258447232334, 'std_penalty_weight': 0.2108303008305789}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.153118133544922, Std is: 17.057716369628906



[I 2025-02-12 22:37:05,642] Trial 188 finished with value: 9.673972129821777 and parameters: {'reset_noise_scale': 0.10188571975209143, 'forward_reward_weight': 0.7006353942972952, 'ctrl_cost_weight': 0.1336675364521237, 'healthy_reward': 1.4344354482881143, 'contact_cost_weight': 0.0005069305672496718, 'healthy_z_lower': 0.1740529770225623, 'healthy_z_upper': 0.9708531253709696, 'contact_force_min': -0.6683270056762377, 'contact_force_max': 0.7328128081384729, 'learning_rate': 0.0005410954245407238, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9740580564870351, 'gae_lambda': 0.9375175250270675, 'clip_range': 0.3386203707308892, 'ent_coef': 0.007533686787921977, 'std_penalty_weight': 0.21063970039368973}. Best is trial 148 with value: 30.901912689208984.


Mean is: 12.6837797164917, Std is: 14.288890838623047



[I 2025-02-12 22:38:35,397] Trial 189 finished with value: 17.19009017944336 and parameters: {'reset_noise_scale': 0.10645420463195887, 'forward_reward_weight': 0.7365377323826525, 'ctrl_cost_weight': 0.1499804174826902, 'healthy_reward': 1.4295943620735796, 'contact_cost_weight': 0.0005813671923568544, 'healthy_z_lower': 0.16999331192133793, 'healthy_z_upper': 1.1611192309037042, 'contact_force_min': -0.7065707842972428, 'contact_force_max': 0.7308377865670255, 'learning_rate': 0.0005825317333966968, 'n_steps': 2048, 'batch_size': 2048, 'gamma': 0.9767860434637335, 'gae_lambda': 0.9383643718272311, 'clip_range': 0.3230107721666308, 'ent_coef': 0.005659343113469663, 'std_penalty_weight': 0.22242990014142908}. Best is trial 148 with value: 30.901912689208984.


Mean is: 21.098766326904297, Std is: 17.57262420654297



[I 2025-02-12 22:40:09,667] Trial 190 finished with value: 22.229686737060547 and parameters: {'reset_noise_scale': 0.10113957040293484, 'forward_reward_weight': 0.7808555213694536, 'ctrl_cost_weight': 0.15911216991221555, 'healthy_reward': 1.415782251223516, 'contact_cost_weight': 0.0005506335577402671, 'healthy_z_lower': 0.19102588293701697, 'healthy_z_upper': 1.1652211783713136, 'contact_force_min': -0.6799476667601395, 'contact_force_max': 0.7260283988272886, 'learning_rate': 0.0005300561282224076, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9746753779563108, 'gae_lambda': 0.93383882650985, 'clip_range': 0.32811134904433403, 'ent_coef': 0.010320925770146247, 'std_penalty_weight': 0.1781604512433682}. Best is trial 148 with value: 30.901912689208984.


Mean is: 25.431718826293945, Std is: 17.972742080688477



[I 2025-02-12 22:41:49,038] Trial 191 finished with value: 27.90216827392578 and parameters: {'reset_noise_scale': 0.09450712005445115, 'forward_reward_weight': 0.7154880367019896, 'ctrl_cost_weight': 0.17734486536529095, 'healthy_reward': 1.4465504247964682, 'contact_cost_weight': 0.00047033011867588583, 'healthy_z_lower': 0.1801074083760175, 'healthy_z_upper': 1.1553773657762134, 'contact_force_min': -0.6865635741137003, 'contact_force_max': 0.7477461386037985, 'learning_rate': 0.0004887001224293299, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9750460006940382, 'gae_lambda': 0.9292012832603068, 'clip_range': 0.3356190877594757, 'ent_coef': 0.003720519567283081, 'std_penalty_weight': 0.19756882581559837}. Best is trial 148 with value: 30.901912689208984.


Mean is: 31.205039978027344, Std is: 16.717571258544922



[I 2025-02-12 22:43:22,365] Trial 192 finished with value: 24.560298919677734 and parameters: {'reset_noise_scale': 0.09356321115890538, 'forward_reward_weight': 0.7597836390894326, 'ctrl_cost_weight': 0.1796570688473629, 'healthy_reward': 1.4502508206484006, 'contact_cost_weight': 0.00046793561492783266, 'healthy_z_lower': 0.17918144645179768, 'healthy_z_upper': 1.1506749559081735, 'contact_force_min': -0.6885391695272405, 'contact_force_max': 0.8579623332282886, 'learning_rate': 0.00047286778512449506, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9760277405591246, 'gae_lambda': 0.9461779933971255, 'clip_range': 0.3343521987402144, 'ent_coef': 0.004082859258473489, 'std_penalty_weight': 0.1962505138593354}. Best is trial 148 with value: 30.901912689208984.


Mean is: 27.8823184967041, Std is: 16.927438735961914



[I 2025-02-12 22:44:58,354] Trial 193 finished with value: 28.92969512939453 and parameters: {'reset_noise_scale': 0.10861632169479375, 'forward_reward_weight': 0.7148550985486882, 'ctrl_cost_weight': 0.15150095517617074, 'healthy_reward': 1.4408392553469447, 'contact_cost_weight': 0.00048586845182517073, 'healthy_z_lower': 0.195061841229799, 'healthy_z_upper': 1.1756042742365675, 'contact_force_min': -0.6769474464806459, 'contact_force_max': 0.7471020954814898, 'learning_rate': 0.000497994345861337, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9737351350815934, 'gae_lambda': 0.9287961661010405, 'clip_range': 0.3371642401772, 'ent_coef': 0.0032961960259097747, 'std_penalty_weight': 0.18625748058878147}. Best is trial 148 with value: 30.901912689208984.


Mean is: 32.26236343383789, Std is: 17.892803192138672



[I 2025-02-12 22:46:24,345] Trial 194 finished with value: 17.740697860717773 and parameters: {'reset_noise_scale': 0.10907100287210267, 'forward_reward_weight': 0.7196490685327687, 'ctrl_cost_weight': 0.13304902787506062, 'healthy_reward': 1.4334075865466998, 'contact_cost_weight': 0.00048714013263259176, 'healthy_z_lower': 0.2034295422738534, 'healthy_z_upper': 1.1553965858496709, 'contact_force_min': -0.7302778432360826, 'contact_force_max': 0.7468342779020238, 'learning_rate': 0.0005100973112484909, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9735900473601307, 'gae_lambda': 0.942320054806991, 'clip_range': 0.3377293213421603, 'ent_coef': 0.004820158409201004, 'std_penalty_weight': 0.18556169162728453}. Best is trial 148 with value: 30.901912689208984.


Mean is: 21.188987731933594, Std is: 18.582984924316406



[I 2025-02-12 22:47:59,795] Trial 195 finished with value: 27.778343200683594 and parameters: {'reset_noise_scale': 0.10395641267045033, 'forward_reward_weight': 0.7402955362859454, 'ctrl_cost_weight': 0.15563205884438905, 'healthy_reward': 1.4138758679513677, 'contact_cost_weight': 0.0005023737129590724, 'healthy_z_lower': 0.1956956212323202, 'healthy_z_upper': 1.1779064528327583, 'contact_force_min': -0.6630123914075874, 'contact_force_max': 0.7478744570009217, 'learning_rate': 0.0003447110168955559, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9744390792942547, 'gae_lambda': 0.9288794556061459, 'clip_range': 0.32954060040352995, 'ent_coef': 0.003017585588690237, 'std_penalty_weight': 0.17543318468421656}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.76872444152832, Std is: 17.045700073242188



[I 2025-02-12 22:49:35,396] Trial 196 finished with value: 27.78273582458496 and parameters: {'reset_noise_scale': 0.1037779861470379, 'forward_reward_weight': 0.6919994403183941, 'ctrl_cost_weight': 0.1498945908211451, 'healthy_reward': 1.4133696771376052, 'contact_cost_weight': 0.0005088069655287899, 'healthy_z_lower': 0.19810965981441464, 'healthy_z_upper': 1.1777416832564163, 'contact_force_min': -0.6558736749328539, 'contact_force_max': 0.7391829780177739, 'learning_rate': 0.0003507802305182531, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.974437097405011, 'gae_lambda': 0.9291139276098513, 'clip_range': 0.3287293763155357, 'ent_coef': 0.007522290747036471, 'std_penalty_weight': 0.17601024113853067}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.884563446044922, Std is: 17.62299919128418



[I 2025-02-12 22:51:12,527] Trial 197 finished with value: 29.266921997070312 and parameters: {'reset_noise_scale': 0.10379682881293358, 'forward_reward_weight': 0.6936921878112482, 'ctrl_cost_weight': 0.12109986279114612, 'healthy_reward': 1.3828784294103857, 'contact_cost_weight': 0.0005151169898166695, 'healthy_z_lower': 0.1969318680459329, 'healthy_z_upper': 1.186619785384994, 'contact_force_min': -0.6438888251371037, 'contact_force_max': 0.7453325692572104, 'learning_rate': 0.0003446448535289737, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9739308578812734, 'gae_lambda': 0.9293781233000411, 'clip_range': 0.47641373707870943, 'ent_coef': 0.007777716926399826, 'std_penalty_weight': 0.15791328031857324}. Best is trial 148 with value: 30.901912689208984.


Mean is: 31.813505172729492, Std is: 16.12646484375



[I 2025-02-12 22:52:47,160] Trial 198 finished with value: 26.999189376831055 and parameters: {'reset_noise_scale': 0.10413724784108185, 'forward_reward_weight': 0.6993877004647789, 'ctrl_cost_weight': 0.11812354312405135, 'healthy_reward': 1.365538979076472, 'contact_cost_weight': 0.0005329387382964924, 'healthy_z_lower': 0.19505528997590998, 'healthy_z_upper': 1.1841914352267344, 'contact_force_min': -0.6405593743453151, 'contact_force_max': 0.7669400267225046, 'learning_rate': 0.0003407366795318475, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9742504822883004, 'gae_lambda': 0.9285494035230145, 'clip_range': 0.46948103137881114, 'ent_coef': 0.008922749122977143, 'std_penalty_weight': 0.14971440133854577}. Best is trial 148 with value: 30.901912689208984.


Mean is: 29.5335750579834, Std is: 16.92813491821289



[I 2025-02-12 22:54:21,419] Trial 199 finished with value: 27.242700576782227 and parameters: {'reset_noise_scale': 0.10363167384940608, 'forward_reward_weight': 0.6933624237099222, 'ctrl_cost_weight': 0.12562378012335523, 'healthy_reward': 1.3915584825119784, 'contact_cost_weight': 0.0005696250722390702, 'healthy_z_lower': 0.1970903712023065, 'healthy_z_upper': 1.1848491889039514, 'contact_force_min': -0.6523668185942606, 'contact_force_max': 0.7699187082347616, 'learning_rate': 0.0003433335811645501, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9737390228138236, 'gae_lambda': 0.9286205371231927, 'clip_range': 0.45619802434369394, 'ent_coef': 0.00986534538107594, 'std_penalty_weight': 0.15510298657631527}. Best is trial 148 with value: 30.901912689208984.


Mean is: 29.959224700927734, Std is: 17.51432228088379



[I 2025-02-12 22:56:01,388] Trial 200 finished with value: 30.232635498046875 and parameters: {'reset_noise_scale': 0.1031493387880844, 'forward_reward_weight': 0.6881098771078412, 'ctrl_cost_weight': 0.12326807399345685, 'healthy_reward': 1.3720768688470932, 'contact_cost_weight': 0.0005582586385340102, 'healthy_z_lower': 0.19540829211773442, 'healthy_z_upper': 1.1999292774228123, 'contact_force_min': -0.6382809894768439, 'contact_force_max': 0.7525962630594597, 'learning_rate': 0.00030372486033285965, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.973667104237666, 'gae_lambda': 0.9280091517473548, 'clip_range': 0.47843471568040813, 'ent_coef': 0.009568683872128569, 'std_penalty_weight': 0.14565369486856625}. Best is trial 148 with value: 30.901912689208984.


Mean is: 32.535057067871094, Std is: 15.807500839233398



[I 2025-02-12 22:57:39,474] Trial 201 finished with value: 26.015905380249023 and parameters: {'reset_noise_scale': 0.10387847528453609, 'forward_reward_weight': 0.6948529151265218, 'ctrl_cost_weight': 0.12005504115364682, 'healthy_reward': 1.370188437623166, 'contact_cost_weight': 0.0005621993801531815, 'healthy_z_lower': 0.19653518561584848, 'healthy_z_upper': 1.1914459555993167, 'contact_force_min': -0.6338911411696859, 'contact_force_max': 0.7664857942472068, 'learning_rate': 0.0003366893196002551, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9736678690150472, 'gae_lambda': 0.9282891782192876, 'clip_range': 0.4779694160406078, 'ent_coef': 0.012932104798041133, 'std_penalty_weight': 0.14268215731988776}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.70638656616211, Std is: 18.85646629333496



[I 2025-02-12 22:59:17,589] Trial 202 finished with value: 26.1997127532959 and parameters: {'reset_noise_scale': 0.10949308238111614, 'forward_reward_weight': 0.7154822350006617, 'ctrl_cost_weight': 0.12140839990765245, 'healthy_reward': 1.3838917414781204, 'contact_cost_weight': 0.0005375921472379695, 'healthy_z_lower': 0.19424154084010514, 'healthy_z_upper': 1.1990941146678453, 'contact_force_min': -0.6520767484352028, 'contact_force_max': 0.750374635698943, 'learning_rate': 0.00028989244020340057, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9742497427227536, 'gae_lambda': 0.9279392245951591, 'clip_range': 0.47297334283096754, 'ent_coef': 0.010218498694027096, 'std_penalty_weight': 0.15167751787079325}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.90365219116211, Std is: 17.826900482177734



[I 2025-02-12 23:00:50,574] Trial 203 finished with value: 21.080018997192383 and parameters: {'reset_noise_scale': 0.1040329962063381, 'forward_reward_weight': 0.6735722338542094, 'ctrl_cost_weight': 0.13397298493059273, 'healthy_reward': 1.3926827015012797, 'contact_cost_weight': 0.0005240658611234891, 'healthy_z_lower': 0.19769131494761094, 'healthy_z_upper': 1.1857689758435086, 'contact_force_min': -0.6395525520894263, 'contact_force_max': 0.7661095142111592, 'learning_rate': 0.00031372398626234297, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9737810664135047, 'gae_lambda': 0.9295821477303201, 'clip_range': 0.47293207510081553, 'ent_coef': 0.009079696166846653, 'std_penalty_weight': 0.16128777016587997}. Best is trial 148 with value: 30.901912689208984.


Mean is: 24.152820587158203, Std is: 19.051671981811523



[I 2025-02-12 23:02:28,475] Trial 204 finished with value: 28.26485252380371 and parameters: {'reset_noise_scale': 0.10596803592354716, 'forward_reward_weight': 0.6557505526022134, 'ctrl_cost_weight': 0.11453420286505651, 'healthy_reward': 1.3675399192066753, 'contact_cost_weight': 0.0005441536606048356, 'healthy_z_lower': 0.2079398039908357, 'healthy_z_upper': 1.1870958044879274, 'contact_force_min': -0.6620442786747407, 'contact_force_max': 0.7436931273353808, 'learning_rate': 0.00034577722543375545, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.97326853943291, 'gae_lambda': 0.9319307063401324, 'clip_range': 0.48494678856067935, 'ent_coef': 0.007258360622451624, 'std_penalty_weight': 0.13774974155239378}. Best is trial 148 with value: 30.901912689208984.


Mean is: 30.720243453979492, Std is: 17.825008392333984



[I 2025-02-12 23:04:02,988] Trial 205 finished with value: 26.404130935668945 and parameters: {'reset_noise_scale': 0.1069294927378817, 'forward_reward_weight': 0.64375606564558, 'ctrl_cost_weight': 0.16354769019782134, 'healthy_reward': 1.409111673307006, 'contact_cost_weight': 0.0005691066418683644, 'healthy_z_lower': 0.205886019422266, 'healthy_z_upper': 1.1999023237246431, 'contact_force_min': -0.6598851444802195, 'contact_force_max': 0.74134712828774, 'learning_rate': 0.00036277666501130306, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9739390214479902, 'gae_lambda': 0.930897884731106, 'clip_range': 0.4579414308187929, 'ent_coef': 0.006806062847790959, 'std_penalty_weight': 0.10726180382508049}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.410146713256836, Std is: 18.702049255371094



[I 2025-02-12 23:05:36,542] Trial 206 finished with value: 25.78643035888672 and parameters: {'reset_noise_scale': 0.11363405960496634, 'forward_reward_weight': 0.6532737274304571, 'ctrl_cost_weight': 0.1313728476643531, 'healthy_reward': 1.3726897043561155, 'contact_cost_weight': 0.0005511399748709426, 'healthy_z_lower': 0.19038253926035162, 'healthy_z_upper': 1.1901249434359422, 'contact_force_min': -0.6647768348139104, 'contact_force_max': 0.7548419487433393, 'learning_rate': 0.0002736453787602979, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9733920287037915, 'gae_lambda': 0.9266441263277047, 'clip_range': 0.4913114123346966, 'ent_coef': 0.006002670544466264, 'std_penalty_weight': 0.12324476233908144}. Best is trial 148 with value: 30.901912689208984.


Mean is: 28.044198989868164, Std is: 18.319379806518555



[W 2025-02-12 23:05:55,460] Trial 207 failed with parameters: {'reset_noise_scale': 0.09882745989657987, 'forward_reward_weight': 0.680277644968093, 'ctrl_cost_weight': 0.1490959442918867, 'healthy_reward': 1.4093760313974195, 'contact_cost_weight': 0.000488185341807725, 'healthy_z_lower': 0.20905480903441637, 'healthy_z_upper': 1.178642228071086, 'contact_force_min': -0.6453245438172286, 'contact_force_max': 0.751388572225389, 'learning_rate': 0.00030861786742803973, 'n_steps': 2048, 'batch_size': 512, 'gamma': 0.9732543352306958, 'gae_lambda': 0.932294006945299, 'clip_range': 0.4878404506064939, 'ent_coef': 0.011189992706647924, 'std_penalty_weight': 0.16997855031576461} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "/Users/ignazioemanuelepicciche/Documents/Ignazio PC/ucbm/deep_learning/Reinforcement_Learning_MuJoCu/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)


KeyboardInterrupt: 