# Imports

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy import signal
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
import time
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from math import sqrt
import matplotlib.pyplot as plt
import joblib
import os


2025-01-30 15:22:42.545052: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-30 15:22:42.547502: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-30 15:22:42.575648: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-30 15:22:42.575675: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-30 15:22:42.576573: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

In [18]:
scalers_dir = 'scalers'
rl_models_dir = 'RL_models'

# Additional functions


In [4]:
def inverse_scale_pos(scaler_pos, scaledPos):

  pos = scaler_pos.inverse_transform(scaledPos)

  return pos

In [5]:
def inverse_scale_forces(scaler_fx, scaler_fy, scaled_fx, scaled_fy):
  force_x = scaler_fx.inverse_transform(scaled_fx)
  force_y = scaler_fy.inverse_transform(scaled_fy)

  return force_x, force_y

In [6]:
def create_pos_motion_list(pos_cmd):
  pos_motion_list = []
  index = 1
  for i in range(len(pos_cmd)):
    if i == 0:
      new_value = 0.0
      pos_motion_list.append(new_value)
      continue
    if (pos_cmd[i - 1]) != (pos_cmd[i]):
      new_value = 0.0
      pos_motion_list.append(new_value)
      index = 1
    else:
      new_value = index / 100
      pos_motion_list.append(new_value)
      index += 1

  return pos_motion_list


In [7]:
def preprocess_pred_data(dataset, n):

  window = 100

  def make_dataset(dataset, look_back=1):
      dataX = []
      for i in range(len(dataset) - look_back - 1):
          xset = []
          for j in range(n):
              input = dataset[i:(i + look_back), j]
              xset.append(input)


          dataX.append(xset)

      return np.array(dataX)

  look_back = window
  predX = make_dataset(dataset, look_back)

  return predX

# RL inverse model

In [8]:
import argparse
import os
import gym
from gym import spaces
from stable_baselines3 import PPO
from sb3_contrib import RecurrentPPO
from stable_baselines3.common.vec_env import DummyVecEnv

In [9]:
import os
# Check the current directory
os.getcwd()

'/home/hama6767/robosoft/ML-soft-fin-motion-RoboSoft2025/surrogate_model'

In [10]:
lstm_forcenet = tf.keras.models.load_model('pretrained_models/lstm_force_net.keras')
cnn_posnet = tf.keras.models.load_model('pretrained_models/cnn_pos_net.keras')

In [11]:
print(lstm_forcenet.input_shape)
print(cnn_posnet.input_shape)

(None, 3, 100)
(None, 2, 100)


## Define training env and parameters

In [14]:
scaler_pos = joblib.load(os.path.join(scalers_dir,'scaler_pos.pkl'))
scaler_predicted_pos = MinMaxScaler(feature_range=(-1.57, 1.57))
scaler_pos_cmd = joblib.load(os.path.join(scalers_dir,'scaler_pos_cmd.pkl'))
scaler_ang_vel = joblib.load(os.path.join(scalers_dir,'scaler_ang_vel.pkl'))
scaler_fx = joblib.load(os.path.join(scalers_dir,'scaler_fx.pkl'))
scaler_fy = joblib.load(os.path.join(scalers_dir,'scaler_fy.pkl'))

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [15]:
def plot_returns(returns, window_size=100):
    plt.figure(figsize=(12, 6))
    plt.plot(returns, label='Episode Return')

    if len(returns) >= window_size:
        rolling_mean = pd.Series(returns).rolling(window=window_size).mean()
        plt.plot(rolling_mean, label=f'Rolling Mean (window={window_size})', color='orange')

    plt.xlabel('Episode')
    plt.ylabel('Return')
    plt.title('Episode Returns Over Time')
    plt.legend()
    plt.show()

In [16]:
class LearningFinMovement(gym.Env):
    def __init__(self, show_plot=False):
        super(LearningFinMovement, self).__init__()
        self.show_plot = show_plot
        self.max_steps = 250

        self.posnet = cnn_posnet  # Load posnet
        self.lstm_forcenet = lstm_forcenet  # Load forcenet
        self.current_step = 0
        self.observation_dimension = 5
        self.keep_cmd_history = 3
        self.force_cmd_freq = 30
        self.current_episode_return = 0  # Track the return for the current episode
        self.episode_returns = []

        self.observation_dimension = self.observation_dimension + self.keep_cmd_history * 2

        self.action_space = spaces.Box(low=np.array([-1.0, -1.0]),
                                        high=np.array([1.0, 1.0]), dtype=np.float16) # [angle, angular_vel]
        self.oscillation_length = 5

        self.observation_space = spaces.Box(low=-float("inf"),
                                            high=float("inf"),
                                            shape=(self.observation_dimension,),
                                            dtype=np.float16)

        self.command_hz = 30  # original actuator and sensor Hz
        self.window = 100  # original actuator and sensor Hz
        self.buffer_length = 500
        self.reward_calculate_length = self.command_hz * self.oscillation_length
        self.timesteps = 0

        self.target_force_x_init = 2.0
        self.target_force_x = 2.0 # Set a random or predefined value
        self.target_force_y = 0
        self.target_force_y_init = 0
        self.target_range = 0

        #### PARAMETERS ####
        self.weight_x = 1.0
        self.weight_y = 1.0
        self.weight_c = 0.2

        self.cmd_pos_buffer = np.zeros(self.buffer_length)
        self.cmd_vel_buffer = np.zeros(self.buffer_length)
        self.pos_motion_list_buffer = np.zeros(self.buffer_length)

        self.pos_cmd_history = np.zeros(self.keep_cmd_history)
        self.ang_vel_history = np.zeros(self.keep_cmd_history)

    def pred_PosNet(self, df, cmd_pos, cmd_vel, pos_motion_list):

        scaled_cmd_pos = scaler_pos_cmd.transform(np.array(cmd_pos).reshape(-1, 1))
        scaled_cmd_vel = scaler_ang_vel.transform(np.array(cmd_vel).reshape(-1, 1))
        pos_motion_list_scaled = np.array(pos_motion_list).reshape(-1, 1)

        # Stack the features together along the "channel" axis (axis=1)
        dataset = np.hstack([scaled_cmd_pos, scaled_cmd_vel, pos_motion_list_scaled])
        scaled_features = preprocess_pred_data(dataset, 2)


        # Predict with posnet
        predictions = self.posnet.predict(scaled_features)
        position =[predictions[:, 0]]

        posPredict = inverse_scale_pos(scaler_pos, position)

        df_pred = pd.DataFrame()
        df_pred['cmd_pos'] = cmd_pos
        df_pred['cmd_vel'] = cmd_vel
        df_pred['pos_motion_list'] = pos_motion_list

        posPredict_full = np.concatenate((np.full((self.window + 1,), 0), posPredict.flatten()))

        df_pred['position'] = posPredict_full

        if self.show_plot:
          plt.figure(figsize=(21, 7))
          plt.plot(df_pred['cmd_pos'], label='cmd_pos')
          plt.plot(df_pred['position'], label='position')
          plt.legend()
          plt.show()



        return df_pred

    def pred_ForceNet(self, df, position, cmd_pos, cmd_vel, pos_motion_list):
        scaled_position = scaler_pos.transform(np.array(position).reshape(-1, 1)) # share the scaler
        scaled_cmd_pos = scaler_pos_cmd.transform(np.array(cmd_pos).reshape(-1, 1))
        scaled_cmd_vel = scaler_ang_vel.transform(np.array(cmd_vel).reshape(-1, 1))
        pos_motion_list = np.array(pos_motion_list).reshape(-1, 1)

        dataset = np.hstack((scaled_position, scaled_cmd_pos, scaled_cmd_vel))
        scaled_features = preprocess_pred_data(dataset, 3)

        predictions = self.lstm_forcenet.predict(scaled_features)
        force_x = [predictions[:, 0]]
        force_y = [predictions[:, 1]]
        force_x, force_y = inverse_scale_forces(scaler_fx, scaler_fy, force_x, force_y)

        fx_predict_full = np.concatenate((np.full((self.window + 1,), 0), force_x.flatten()))
        fy_predict_full = np.concatenate((np.full((self.window + 1,), 0), force_y.flatten()))


        df_final = pd.DataFrame()
        df_final['position'] = position
        df_final['force_x'] = fx_predict_full
        df_final['force_y'] = fy_predict_full


        fx_average = np.mean(df_final['force_x'][-self.reward_calculate_length:])
        fy_average = np.mean(df_final['force_y'][-self.reward_calculate_length:])
        print("fx_average: ", fx_average)
        print("fy_average: ", fy_average)
        if self.show_plot:
          plt.figure(figsize=(21, 7))
          plt.plot(df_final['force_y'], label='fy')
          plt.axhline(y=self.target_force_x, color='r', linestyle='--', label='target_force_x')
          plt.plot(df_final['force_x'], label='fx')
          plt.axhline(y=fx_average, color='g', label='fx average')
          plt.axhline(y=self.target_force_y, color='b', linestyle='--', label='target_force_y')
          plt.axhline(y=fy_average, color='y', label='fy average')
          plt.legend()
          plt.show()


        return df_final

    def generate_oscillation(self, phase, amp, zd):
        next_pos = zd + amp/2 * self.phase
        if next_pos > 1.57:
            next_pos = 1.57
        elif next_pos < -1.57:
            next_pos = -1.57
        self.phase *= -1
        return next_pos

    def motor_forward_model(self, cmd_pos, cmd_vel, pos_motion_list):
        df_tmp = pd.DataFrame(columns=['cmd_pos', 'cmd_vel', 'position', 'pos_motion_list'])

        _cmd_pos = cmd_pos
        _cmd_vel = cmd_vel
        _pos_motion_list = pos_motion_list

        df_tmp['cmd_pos'] = _cmd_pos
        df_tmp['cmd_vel'] = _cmd_vel
        df_tmp['pos_motion_list'] = _pos_motion_list
        df_pred = self.pred_PosNet(df_tmp, df_tmp['cmd_pos'], df_tmp['cmd_vel'], df_tmp['pos_motion_list'])

        df_final = self.pred_ForceNet(df_pred, df_pred['position'], df_pred['cmd_pos'], df_pred['cmd_vel'], df_pred['pos_motion_list'])
        df_final = df_final.iloc[- self.reward_calculate_length:]

        return df_final

    def reset(self):
        self.timesteps = 0
        self.current_step = 0
        self.current_episode_return = 0
        self.reward_list = []
        self.cmd_pos_buffer = np.zeros(self.buffer_length)
        self.cmd_vel_buffer = np.zeros(self.buffer_length)
        self.pos_motion_list_buffer = np.zeros(self.buffer_length)
        obs = [0, self.target_force_x, self.target_force_y, 0, 0]
        if self.keep_cmd_history > 0:
          obs = np.array(obs)
          obs = np.concatenate((obs, self.pos_cmd_history, self.ang_vel_history))
          obs = obs.flatten()
        print("-----------RESET!!!!!!!!!!!!!-----------")
        return obs

    def step(self, action):
        start_time = time.perf_counter()

        self.current_step += 1

        _action_pos, _action_vel = action
        action_pos = _action_pos * 1.20
        action_vel = (_action_vel + 1.0)* 1.0 + 1.0
        print("action_pos: ", action_pos)
        print("action_vel: ", action_vel)
        self.pos_cmd_history = np.append(self.pos_cmd_history, action_pos)
        self.ang_vel_history = np.append(self.ang_vel_history, action_vel)

        for i in range(self.command_hz):
            self.cmd_pos_buffer = np.append(self.cmd_pos_buffer, action_pos)
            self.cmd_vel_buffer = np.append(self.cmd_vel_buffer, action_vel)

        self.cmd_pos_buffer = self.cmd_pos_buffer[-self.buffer_length:]
        self.cmd_vel_buffer = self.cmd_vel_buffer[-self.buffer_length:]

        self.pos_motion_list_buffer = create_pos_motion_list(self.cmd_pos_buffer)

        df_tmp = self.motor_forward_model(self.cmd_pos_buffer, self.cmd_vel_buffer, self.pos_motion_list_buffer)
        self.df_tmp = df_tmp

        self.pos_cmd_history = self.pos_cmd_history[-self.keep_cmd_history:]
        self.ang_vel_history = self.ang_vel_history[-self.keep_cmd_history:]

        # Calculate average force
        average_force_x = np.mean(df_tmp['force_x'])
        average_force_y = np.mean(df_tmp['force_y'])
        latest_force_x = np.mean(df_tmp['force_x'][-self.command_hz:-1])

        episode_done = self.current_step == self.max_steps
        reward = self.reward_function_s(average_force_x, average_force_y, self.target_force_x, self.target_force_y)
        self.current_episode_return += reward

        if episode_done:
            self.episode_returns.append(self.current_episode_return)
            self.current_episode_return = 0

        # Update target force before action to reflect on the observation
        if self.timesteps % self.force_cmd_freq == 0:
            self.target_force_x = self.target_force_x_init + self.target_range * np.random.uniform(-1, 1)
            self.target_force_y = self.target_force_y_init + self.target_range * np.random.uniform(-1, 1)


        latest_pos = df_tmp['position'].iloc[-1]
        # First, observe the environment
        observation = [latest_pos, self.target_force_x, self.target_force_y, action[0], action[1]]
        if self.keep_cmd_history > 0:
            observation = np.concatenate((observation, self.pos_cmd_history, self.ang_vel_history))
            observation = observation.flatten()

        print("observation: ", observation)

        self.timesteps += 1
        print("timesteps: ", self.timesteps)

        info = {}

        return observation, reward, episode_done, info

    def reward_function(self, force_x, force_y, target_force_x, target_force_y):
        reward = 0.0
        valid_error = 2.0

        force_error_x = force_x - target_force_x
        force_error_y = force_y - target_force_y

        reward_x = self.weight_x * -abs(force_error_x)
        reward_y = self.weight_y * -abs(force_error_y)

        reward += 4 + reward_x + reward_y

        print("target_force_x: ", target_force_x)
        print("target_force_y: ", target_force_y)
        print("force_error_x: ", force_error_x)
        print("force_error_y: ", force_error_y)
        print("REWARD: ", reward)
        print("--------")

        return reward

    def reward_function_s(self, force_x, force_y, target_force_x, target_force_y):
        reward = 0.0

        force_error_x = np.mean(force_x) - target_force_x
        force_error_y = np.mean(force_y) - target_force_y
        reward_x = self.weight_x * -abs(force_error_x)
        reward_y = self.weight_y * -abs(force_error_y)

        lambda_smoothness = 0.10
        force_x_diff = np.diff(self.df_tmp['force_x'][-self.reward_calculate_length:])
        force_y_diff = np.diff(self.df_tmp['force_y'][-self.reward_calculate_length:])
        smoothness_x = np.sum(force_x_diff**2)/len(force_x_diff)
        smoothness_y = np.sum(force_y_diff**2)/len(force_y_diff)
        sobolev_term = (lambda_smoothness * (smoothness_x + smoothness_y))**0.5

        reward = reward_x + reward_y - sobolev_term
        print("target_force_x: ", target_force_x)
        print("target_force_y: ", target_force_y)
        print("force_error_x: ", force_error_x)
        print("force_error_y: ", force_error_y)
        print("sobolev_term: ", sobolev_term)
        print("REWARD: ", reward)
        print("--------")

        return reward

## Agent Training

In [None]:
import warnings
from stable_baselines3.common.callbacks import CheckpointCallback
from stable_baselines3 import A2C
from stable_baselines3 import PPO
from sb3_contrib import RecurrentPPO

# Define the environment
env = LearningFinMovement(False)


# Path where the checkpoints are saved
checkpoint_dir = rl_models_dir
checkpoint_prefix = 'y_ppo_fin_movement'

def find_latest_checkpoint(directory, prefix):
    checkpoints = [f for f in os.listdir(directory) if f.startswith(prefix)]

    if not checkpoints:
        return None
    checkpoints.sort(key=lambda x: int(x.split('_')[4].split('.')[0]), reverse=True)
    return os.path.join(directory, checkpoints[0])

latest_checkpoint = find_latest_checkpoint(checkpoint_dir, checkpoint_prefix)

if latest_checkpoint:
    print(f"Loading model from checkpoint: {latest_checkpoint}")
    model = RecurrentPPO.load(latest_checkpoint, env=env, n_steps=32, gamma=0.8) # ent_coef=0.001
else:
    print("No checkpoint found. Starting from scratch.")
    model = RecurrentPPO("MlpLstmPolicy", env, verbose=1, n_steps=32, gamma=0.8)
    policy_kwargs = dict(
                lstm_hidden_size=64,
                n_lstm_layers=3,
                net_arch=[64]
            )


checkpoint_callback = CheckpointCallback(save_freq=1000, save_path=checkpoint_dir, name_prefix=checkpoint_prefix)
warnings.filterwarnings("ignore", message="X does not have valid feature names")
model.learn(total_timesteps=5000, callback=checkpoint_callback)  # Adjust the total timesteps as needed
plot_returns(env.episode_returns)

model.save("final_model_ppo_fin_movement")

No checkpoint found. Starting from scratch.
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-----------RESET!!!!!!!!!!!!!-----------
action_pos:  -0.29615428447723385
action_vel:  1.77028027176857




fx_average:  0.7304031928504545
fy_average:  -0.39518607911206793
target_force_x:  2.0
target_force_y:  0
force_error_x:  -1.2695968071495454
force_error_y:  -0.39518607911206793
sobolev_term:  0.10410959759505904
REWARD:  -1.7688924838566722
--------
observation:  [-0.14656989  2.          0.         -0.24679524 -0.22971973  0.
  0.         -0.29615428  0.          0.          1.77028027]
timesteps:  1
action_pos:  0.1996587038040161
action_vel:  1.0
fx_average:  0.8059635022658024
fy_average:  -0.5646425920303431
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1940364977341975
force_error_y:  -0.5646425920303431
sobolev_term:  0.1519176000422859
REWARD:  -1.9105966898068265
--------
observation:  [ 0.04527545  2.          0.          0.16638225 -1.          0.
 -0.29615428  0.1996587   0.          1.77028027  1.        ]
timesteps:  2
action_pos:  -1.2
action_vel:  2.312578409910202
fx_average:  0.8017296626841637
fy_average:  0.22232043860609835
target_force_x:  2.0
tar

fx_average:  2.780629588985239
fy_average:  -0.6626616040043584
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.780629588985239
force_error_y:  -0.6626616040043584
sobolev_term:  0.41567669853032657
REWARD:  -1.8589678915199241
--------
observation:  [-0.24930018  2.          0.          0.906865   -0.5122816   0.62921126
 -1.2         1.088238    2.43255094  1.60577375  1.4877184 ]
timesteps:  16
action_pos:  -1.2
action_vel:  2.102526903152466
fx_average:  3.3417734142037534
fy_average:  -0.5566176549573231
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.3417734142037534
force_error_y:  -0.5566176549573231
sobolev_term:  0.4595798225644103
REWARD:  -2.357970891725487
--------
observation:  [-0.16921186  2.          0.         -1.          0.1025269  -1.2
  1.088238   -1.2         1.60577375  1.4877184   2.1025269 ]
timesteps:  17
action_pos:  -0.049234552681446074
action_vel:  1.0
fx_average:  3.573119146173805
fy_average:  -0.1770876428398841
target_force_x: 

fx_average:  2.0518504983211994
fy_average:  -0.9780850308507107
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.05185049832119937
force_error_y:  -0.9780850308507107
sobolev_term:  0.2996535474716668
REWARD:  -1.329589076643577
--------
observation:  [ 0.47209617  2.          0.          0.4890404   1.         -1.2
  0.64543405  0.58684849  1.42440343  2.94837606  3.        ]
timesteps:  31
action_pos:  0.12658288478851318
action_vel:  3.0
fx_average:  2.6670145581550706
fy_average:  -1.0418637256157828
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6670145581550706
force_error_y:  -1.0418637256157828
sobolev_term:  0.3378550359809059
REWARD:  -2.0467333197517594
--------
observation:  [0.14169997 2.         0.         0.10548574 1.         0.64543405
 0.58684849 0.12658288 2.94837606 3.         3.        ]
timesteps:  32
---------------------------
| time/              |    |
|    fps             | 6  |
|    iterations      | 1  |
|    time_elapsed    | 5  |


fx_average:  1.635632648010787
fy_average:  -2.407821069545371
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.364367351989213
force_error_y:  -2.407821069545371
sobolev_term:  0.2371091846293835
REWARD:  -3.0092976061639676
--------
observation:  [0.74692457 2.         0.         1.         0.15495774 0.53850242
 0.14847497 1.2        1.72109818 3.         2.15495774]
timesteps:  45
action_pos:  0.5083053588867187
action_vel:  2.1541787683963776
fx_average:  2.286652267220252
fy_average:  -1.8700169132037157
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.28665226722025183
force_error_y:  -1.8700169132037157
sobolev_term:  0.276361316657857
REWARD:  -2.433030497081824
--------
observation:  [0.40370454 2.         0.         0.4235878  0.15417877 0.14847497
 1.2        0.50830536 3.         2.15495774 2.15417877]
timesteps:  46
action_pos:  1.2
action_vel:  3.0
fx_average:  -0.03328131274398215
fy_average:  -1.7370564652289393
target_force_x:  2.0
target_force_

fx_average:  1.2509740211836535
fy_average:  -0.6207290268415993
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7490259788163465
force_error_y:  -0.6207290268415993
sobolev_term:  0.30521240241648706
REWARD:  -1.6749674080744328
--------
observation:  [0.96359576 2.         0.         1.         1.         1.2
 1.2        1.2        1.29210764 2.09544683 3.        ]
timesteps:  60
action_pos:  -0.592511773109436
action_vel:  3.0
fx_average:  2.5311276239296916
fy_average:  -2.023710493681356
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5311276239296916
force_error_y:  -2.023710493681356
sobolev_term:  0.2994546480061505
REWARD:  -2.854292765617198
--------
observation:  [-0.02339226  2.          0.         -0.49375981  1.          1.2
  1.2        -0.59251177  2.09544683  3.          3.        ]
timesteps:  61
action_pos:  0.0783422827720642
action_vel:  2.9309921264648438
fx_average:  1.27549641491603
fy_average:  -1.2201431370369062
target_force_x:  2.0
t

fx_average:  1.6139499582451566
fy_average:  -1.5906669713825181
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.38605004175484336
force_error_y:  -1.5906669713825181
sobolev_term:  0.5458690940860225
REWARD:  -2.522586107223384
--------
observation:  [ 0.45532595  2.          0.          0.5649842   1.         -1.2
  0.40903405  0.67798104  2.653925    1.5749681   3.        ]
timesteps:  73
action_pos:  -0.4354647159576416
action_vel:  3.0
fx_average:  2.243125139494549
fy_average:  -0.6498900647263014
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.24312513949454884
force_error_y:  -0.6498900647263014
sobolev_term:  0.529927497435624
REWARD:  -1.4229427016564742
--------
observation:  [-0.34736663  2.          0.         -0.36288726  1.          0.40903405
  0.67798104 -0.43546472  1.5749681   3.          3.        ]
timesteps:  74
action_pos:  -1.2
action_vel:  2.5152111053466797
fx_average:  2.3008130149410793
fy_average:  -0.5210779598306206
target_force_x

fx_average:  2.3013904791807014
fy_average:  -0.7538191785292624
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3013904791807014
force_error_y:  -0.7538191785292624
sobolev_term:  0.31739730592409615
REWARD:  -1.37260696363406
--------
observation:  [-0.34490969  2.          0.         -0.28167439 -1.          0.62828915
  0.33686374 -0.33800926  1.28410977  1.73784512  1.        ]
timesteps:  88
action_pos:  1.2
action_vel:  2.5714078545570374
fx_average:  1.4712579583835657
fy_average:  -1.7088161076329091
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5287420416164343
force_error_y:  -1.7088161076329091
sobolev_term:  0.32773690253421506
REWARD:  -2.5652950517835587
--------
observation:  [ 0.71708954  2.          0.          1.          0.57140785  0.33686374
 -0.33800926  1.2         1.73784512  1.          2.57140785]
timesteps:  89
action_pos:  -1.2
action_vel:  1.457820177078247
fx_average:  1.8610584385361195
fy_average:  -1.5002045511392657
target_f

fx_average:  2.543611474346565
fy_average:  -1.9687891715376384
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.543611474346565
force_error_y:  -1.9687891715376384
sobolev_term:  0.40964919182586906
REWARD:  -2.9220498377100723
--------
observation:  [-0.24710309  2.          0.         -0.97415781  0.19065201 -0.10421084
  1.08419759 -1.16898937  1.56447077  2.32073843  2.19065201]
timesteps:  101
action_pos:  0.21553938388824462
action_vel:  2.693081855773926
fx_average:  3.2728400113785416
fy_average:  -2.1331380455739026
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.2728400113785416
force_error_y:  -2.1331380455739026
sobolev_term:  0.4607596069187606
REWARD:  -3.8667376638712048
--------
observation:  [ 0.08565551  2.          0.          0.17961615  0.69308186  1.08419759
 -1.16898937  0.21553938  2.32073843  2.19065201  2.69308186]
timesteps:  102
action_pos:  -1.2
action_vel:  1.6703845262527466
fx_average:  2.797657341033125
fy_average:  0.06240965529

fx_average:  3.007534970614444
fy_average:  -2.3666301173258573
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.007534970614444
force_error_y:  -2.3666301173258573
sobolev_term:  0.34445451558254947
REWARD:  -3.7186196035228507
--------
observation:  [ 0.62590158  2.          0.          0.68019062 -0.08081468 -1.06470366
  1.11309822  0.81622875  1.          1.25129449  1.91918532]
timesteps:  115
action_pos:  1.2
action_vel:  1.6419862508773804
fx_average:  1.3190066757397034
fy_average:  -1.9600710608504925
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6809933242602966
force_error_y:  -1.9600710608504925
sobolev_term:  0.33477743957714856
REWARD:  -2.9758418246879375
--------
observation:  [ 0.97345163  2.          0.          1.         -0.35801375  1.11309822
  0.81622875  1.2         1.25129449  1.91918532  1.64198625]
timesteps:  116
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.2973983184582285
fy_average:  -0.19945637157316873
target_force_x:  2.

action_pos:  -0.034571439027786255
action_vel:  2.097088612616062
fx_average:  1.5349509452971346
fy_average:  -0.4828559982235517
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4650490547028654
force_error_y:  -0.4828559982235517
sobolev_term:  0.5032428313012677
REWARD:  -1.4511478842276846
--------
observation:  [-0.07203232  2.          0.         -0.02880953  0.09708861 -0.16566934
 -0.06728171 -0.03457144  3.          1.          2.09708861]
timesteps:  129
action_pos:  -0.17098633646965025
action_vel:  1.0
fx_average:  1.524742608807063
fy_average:  -0.09219767312997093
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.475257391192937
force_error_y:  -0.09219767312997093
sobolev_term:  0.46510849462958354
REWARD:  -1.0325635589524915
--------
observation:  [-0.13881939  2.          0.         -0.14248861 -1.         -0.06728171
 -0.03457144 -0.17098634  1.          2.09708861  1.        ]
timesteps:  130
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.

fx_average:  3.185916980076791
fy_average:  -0.7192781787339464
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.185916980076791
force_error_y:  -0.7192781787339464
sobolev_term:  0.4999124293144428
REWARD:  -2.40510758812518
--------
observation:  [0.31493748 2.         0.         0.31575596 0.48226196 1.2
 1.2        0.37890716 2.25379515 1.90650663 2.48226196]
timesteps:  144
action_pos:  1.1214845180511475
action_vel:  2.577394485473633
fx_average:  1.8321419226552693
fy_average:  -1.6948081028899162
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.16785807734473068
force_error_y:  -1.6948081028899162
sobolev_term:  0.46086814399908055
REWARD:  -2.3235343242337274
--------
observation:  [0.85751179 2.         0.         0.93457043 0.57739449 1.2
 0.37890716 1.12148452 1.90650663 2.48226196 2.57739449]
timesteps:  145
action_pos:  -0.7584634780883789
action_vel:  3.0
fx_average:  2.3260642607565525
fy_average:  -2.8232847683229676
target_force_x:  2.0
target_f

fx_average:  1.6999549844700335
fy_average:  -0.07229020514345978
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.30004501552996654
force_error_y:  -0.07229020514345978
sobolev_term:  0.45473729383811884
REWARD:  -0.8270725145115452
--------
observation:  [-1.29232012  2.          0.         -1.          1.          0.50893475
 -1.2        -1.2         1.          2.66986507  3.        ]
timesteps:  159
action_pos:  1.2
action_vel:  1.5434549450874329
fx_average:  3.1638259615861273
fy_average:  -0.18557446182684903
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.1638259615861273
force_error_y:  -0.18557446182684903
sobolev_term:  0.5322927047730862
REWARD:  -1.8816931281860625
--------
observation:  [ 0.2138123   2.          0.          1.         -0.45654505 -1.2
 -1.2         1.2         2.66986507  3.          1.54345495]
timesteps:  160
-------------------------------------------
| time/                   |               |
|    fps                  | 6    

fx_average:  3.2737336960885255
fy_average:  -1.8224162057615254
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.2737336960885255
force_error_y:  -1.8224162057615254
sobolev_term:  0.37789058110117507
REWARD:  -3.474040482951226
--------
observation:  [ 0.166152    2.          0.          1.          0.49072391  1.2
 -1.2         1.2         1.95267495  2.38063371  2.49072391]
timesteps:  172
action_pos:  1.1434802055358886
action_vel:  1.9602169170975685
fx_average:  2.959196518179677
fy_average:  -2.663289007627696
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9591965181796769
force_error_y:  -2.663289007627696
sobolev_term:  0.410064324852749
REWARD:  -4.032549850660122
--------
observation:  [ 0.9117128   2.          0.          0.95290017 -0.03978308 -1.2
  1.2         1.14348021  2.38063371  2.49072391  1.96021692]
timesteps:  173
action_pos:  1.2
action_vel:  2.0403998270630836
fx_average:  1.7840324632198132
fy_average:  -2.12777748895328
target_force_

fx_average:  2.1205786749871725
fy_average:  -0.3469543264943466
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.12057867498717245
force_error_y:  -0.3469543264943466
sobolev_term:  0.3458289063752748
REWARD:  -0.8133619078567939
--------
observation:  [-1.09254681  2.          0.         -0.77568275  1.         -0.84559972
 -1.2        -0.9308193   3.          2.2620303   3.        ]
timesteps:  187
action_pos:  0.6659771203994751
action_vel:  1.942322202026844
fx_average:  2.7474432821301273
fy_average:  -0.5440169407509174
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7474432821301273
force_error_y:  -0.5440169407509174
sobolev_term:  0.4034631754648656
REWARD:  -1.6949233983459104
--------
observation:  [ 0.11960166  2.          0.          0.55498093 -0.0576778  -1.2
 -0.9308193   0.66597712  2.2620303   3.          1.9423222 ]
timesteps:  188
action_pos:  -0.3598669767379761
action_vel:  1.0
fx_average:  3.3424339074657468
fy_average:  -0.554240216210331

fx_average:  2.7262991217367727
fy_average:  -1.158039059153037
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7262991217367727
force_error_y:  -1.158039059153037
sobolev_term:  0.38279696006032227
REWARD:  -2.267135140950132
--------
observation:  [ 0.51587896  2.          0.          1.          0.38779444 -0.92504067
 -0.50523648  1.2         1.          2.2347201   2.38779444]
timesteps:  200
action_pos:  0.7201443672180176
action_vel:  1.1035966873168945
fx_average:  2.2645163678005975
fy_average:  -1.5001138080569334
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2645163678005975
force_error_y:  -1.5001138080569334
sobolev_term:  0.39075088081075715
REWARD:  -2.155381056668288
--------
observation:  [ 0.58036893  2.          0.          0.60012031 -0.89640331 -0.50523648
  1.2         0.72014437  2.2347201   2.38779444  1.10359669]
timesteps:  201
action_pos:  -0.06610350608825684
action_vel:  2.7438220977783203
fx_average:  2.6948618551806103
fy_average

fx_average:  2.6113619659972787
fy_average:  -2.7594260628425977
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6113619659972787
force_error_y:  -2.7594260628425977
sobolev_term:  0.529680252346197
REWARD:  -3.9004682811860736
--------
observation:  [ 0.75497699  2.          0.          1.          0.27018547  1.2
 -0.11373192  1.2         2.31441718  1.          2.27018547]
timesteps:  215
action_pos:  0.9204867839813232
action_vel:  3.0
fx_average:  1.3872404067445243
fy_average:  -2.3341483702921466
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6127595932554757
force_error_y:  -2.3341483702921466
sobolev_term:  0.5043206982519293
REWARD:  -3.4512286617995516
--------
observation:  [ 0.74259907  2.          0.          0.76707232  1.         -0.11373192
  1.2         0.92048678  1.          2.27018547  3.        ]
timesteps:  216
action_pos:  1.2
action_vel:  2.2241465151309967
fx_average:  0.4672186193226192
fy_average:  -1.4624151253058737
target_force_x

fx_average:  1.8303795141615236
fy_average:  0.9032053311730829
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.16962048583847644
force_error_y:  0.9032053311730829
sobolev_term:  0.5188315619455246
REWARD:  -1.5916573789570838
--------
observation:  [-1.00817148  2.          0.         -0.93364275 -1.          1.11744583
 -1.2        -1.12037129  3.          1.          1.        ]
timesteps:  228
action_pos:  -1.2
action_vel:  1.4967231154441833
fx_average:  2.332650352576948
fy_average:  0.6621329371926872
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3326503525769482
force_error_y:  0.6621329371926872
sobolev_term:  0.4742815959537774
REWARD:  -1.4690648857234128
--------
observation:  [-0.95424424  2.          0.         -1.         -0.50327688 -1.2
 -1.12037129 -1.2         1.          1.          1.49672312]
timesteps:  229
action_pos:  1.093507218360901
action_vel:  2.3658863604068756
fx_average:  3.1718858829576906
fy_average:  -0.2316180907507431
ta

fx_average:  3.997162219004575
fy_average:  -1.2912203665621
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.9971622190045748
force_error_y:  -1.2912203665621
sobolev_term:  0.45875253049734604
REWARD:  -3.7471351160640207
--------
observation:  [-0.52681367  2.          0.          0.71567869  1.         -1.1701021
 -1.2         0.85881443  2.51831585  1.01988602  3.        ]
timesteps:  243
action_pos:  -0.11004602909088135
action_vel:  1.0
fx_average:  2.727577817419512
fy_average:  -0.718701200530125
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7275778174195122
force_error_y:  -0.718701200530125
sobolev_term:  0.5245180814602558
REWARD:  -1.970797099409893
--------
observation:  [ 0.08769126  2.          0.         -0.09170502 -1.         -1.2
  0.85881443 -0.11004603  1.01988602  3.          1.        ]
timesteps:  244
action_pos:  0.9754255771636963
action_vel:  2.1025964319705963
fx_average:  2.619425574066313
fy_average:  0.004013522071669892
target_f

action_pos:  0.8613413572311401
action_vel:  1.8838738054037094
fx_average:  2.238781635458665
fy_average:  0.17801146554781033
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.23878163545866515
force_error_y:  0.17801146554781033
sobolev_term:  0.4827172029680431
REWARD:  -0.8995103039745186
--------
observation:  [-0.29141067  2.          0.          0.71778446 -0.11612619 -0.64602799
 -1.2         0.86134136  3.          1.          1.88387381]
timesteps:  7
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.725531735384911
fy_average:  0.10476421217882037
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2744682646150891
force_error_y:  0.10476421217882037
sobolev_term:  0.6084898025671429
REWARD:  -0.9877222793610524
--------
observation:  [-0.23235201  2.          0.         -1.          1.         -1.2
  0.86134136 -1.2         1.          1.88387381  3.        ]
timesteps:  8
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.2508834725517082
fy_average:  0.

fx_average:  3.1333025956532787
fy_average:  -0.28854980168787897
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.1333025956532787
force_error_y:  -0.28854980168787897
sobolev_term:  0.7484492510333317
REWARD:  -2.1703016483744895
--------
observation:  [-0.08075748  2.          0.          1.         -1.          0.25673482
 -1.2         1.2         3.          3.          1.        ]
timesteps:  22
action_pos:  0.20816287994384766
action_vel:  3.0
fx_average:  3.164154342239934
fy_average:  -0.4271071734439953
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.164154342239934
force_error_y:  -0.4271071734439953
sobolev_term:  0.7077666246546117
REWARD:  -2.299028140338541
--------
observation:  [ 0.15773915  2.          0.          0.17346907  1.         -1.2
  1.2         0.20816288  3.          1.          3.        ]
timesteps:  23
action_pos:  0.816238260269165
action_vel:  1.9870432987809181
fx_average:  3.2710621389469603
fy_average:  -2.147229155825204
tar

fx_average:  2.0975951623913733
fy_average:  0.7898723005518483
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.09759516239137334
force_error_y:  0.7898723005518483
sobolev_term:  0.46287183368244955
REWARD:  -1.350339296625671
--------
observation:  [ 0.1219033   2.          0.          0.68343556  0.16173553 -0.6076719
 -1.2         0.82012267  1.39567143  1.25468242  2.16173553]
timesteps:  37
action_pos:  0.24998588562011717
action_vel:  1.0
fx_average:  2.277928162407923
fy_average:  -0.009136797363287883
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.27792816240792284
force_error_y:  -0.009136797363287883
sobolev_term:  0.4236630395254006
REWARD:  -0.7107279992966113
--------
observation:  [ 0.21690084  2.          0.          0.20832157 -1.         -1.2
  0.82012267  0.24998589  1.25468242  2.16173553  1.        ]
timesteps:  38
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 250        |
|   

fx_average:  0.9967595098459081
fy_average:  -0.12933530179397185
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.003240490154092
force_error_y:  -0.12933530179397185
sobolev_term:  0.43158069940474525
REWARD:  -1.564156491352809
--------
observation:  [-1.08389454  2.          0.         -1.         -0.63675129  0.69872546
 -0.72091248 -1.2         1.90545891  2.18002503  1.36324871]
timesteps:  50
action_pos:  -1.2
action_vel:  2.1848116666078568
fx_average:  0.7226722404725212
fy_average:  0.6999493571236591
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2773277595274788
force_error_y:  0.6999493571236591
sobolev_term:  0.4096096336768477
REWARD:  -2.3868867503279856
--------
observation:  [-1.04313962  2.          0.         -1.          0.18481167 -0.72091248
 -1.2        -1.2         2.18002503  1.36324871  2.18481167]
timesteps:  51
action_pos:  -1.2
action_vel:  2.5637460350990295
fx_average:  1.279127512533458
fy_average:  1.848606886433402
target_fo

fx_average:  2.254750264423353
fy_average:  -0.43388184790214873
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.254750264423353
force_error_y:  -0.43388184790214873
sobolev_term:  0.3344481197425778
REWARD:  -1.0230802320680796
--------
observation:  [-1.02332479  2.          0.         -1.         -1.         -1.2
 -1.2        -1.2         1.02250487  1.          1.        ]
timesteps:  65
action_pos:  -1.2
action_vel:  1.0
fx_average:  0.9290829227283574
fy_average:  0.5019669478767804
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0709170772716425
force_error_y:  0.5019669478767804
sobolev_term:  0.26513266048413686
REWARD:  -1.83801668563256
--------
observation:  [-1.03047676  2.          0.         -1.         -1.         -1.2
 -1.2        -1.2         1.          1.          1.        ]
timesteps:  66
action_pos:  -1.2
action_vel:  1.1176984310150146
fx_average:  1.1068453407309686
fy_average:  2.3995376767275074
target_force_x:  2.0
target_force_y:  0

fx_average:  1.7015124244207942
fy_average:  1.924311202884464
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2984875755792058
force_error_y:  1.924311202884464
sobolev_term:  0.5171028483255015
REWARD:  -2.739901626789171
--------
observation:  [-0.66534075  2.          0.         -0.65816224 -1.         -0.86048949
 -1.2        -0.78979468  1.          1.39443225  1.        ]
timesteps:  78
action_pos:  -1.2
action_vel:  1.9403706640005112
fx_average:  0.11800555560299708
fy_average:  2.434519108707758
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.881994444397003
force_error_y:  2.434519108707758
sobolev_term:  0.5208600365598496
REWARD:  -4.83737358966461
--------
observation:  [-1.07855348  2.          0.         -1.         -0.05962934 -1.2
 -0.78979468 -1.2         1.39443225  1.          1.94037066]
timesteps:  79
action_pos:  -0.42098361253738403
action_vel:  1.9408888593316078
fx_average:  1.481560679898985
fy_average:  1.5342561636109258
target_fo

fx_average:  2.758173622257918
fy_average:  0.2052172319647957
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7581736222579178
force_error_y:  0.2052172319647957
sobolev_term:  0.5326875967081268
REWARD:  -1.4960784509308405
--------
observation:  [-0.81654727  2.          0.         -0.56222582 -1.         -1.18613791
 -1.2        -0.67467098  3.          1.          1.        ]
timesteps:  93
action_pos:  -0.13893170356750487
action_vel:  3.0
fx_average:  2.0245295635218894
fy_average:  0.22314865023088093
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.024529563521889397
force_error_y:  0.22314865023088093
sobolev_term:  0.5743407953953102
REWARD:  -0.8220190091480806
--------
observation:  [-0.44709426  2.          0.         -0.11577642  1.         -1.2
 -0.67467098 -0.1389317   1.          1.          3.        ]
timesteps:  94
action_pos:  -1.2
action_vel:  1.6244007647037506
fx_average:  1.1849167158430023
fy_average:  1.1353471053462787
target_force_x:

fx_average:  2.6918742098581108
fy_average:  1.0230803528332522
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6918742098581108
force_error_y:  1.0230803528332522
sobolev_term:  0.5629574880884055
REWARD:  -2.2779120507797685
--------
observation:  [-0.92335944  2.          0.         -1.         -1.          0.68984349
 -1.2        -1.2         1.83529398  1.50037912  1.        ]
timesteps:  106
action_pos:  -1.2
action_vel:  2.0991867035627365
fx_average:  1.5673872059103156
fy_average:  1.7143728092187127
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.43261279408968445
force_error_y:  1.7143728092187127
sobolev_term:  0.47218524439637727
REWARD:  -2.6191708477047744
--------
observation:  [-1.12134359  2.          0.         -1.          0.0991867  -1.2
 -1.2        -1.2         1.50037912  1.          2.0991867 ]
timesteps:  107
action_pos:  1.2
action_vel:  1.095971703529358
fx_average:  2.784429874336766
fy_average:  0.9842690038402058
target_force_x:  

fx_average:  2.286898506583372
fy_average:  -0.1478275640763175
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.286898506583372
force_error_y:  -0.1478275640763175
sobolev_term:  0.4216182390173862
REWARD:  -0.8563443096770758
--------
observation:  [-0.70066493  2.          0.         -0.55634058  0.21858197 -0.97015979
 -1.2        -0.66760869  2.33149913  2.94353652  2.21858197]
timesteps:  121
action_pos:  0.3665459632873535
action_vel:  3.0
fx_average:  1.823561463273595
fy_average:  -0.028510464197553917
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.17643853672640497
force_error_y:  -0.028510464197553917
sobolev_term:  0.4393198661986193
REWARD:  -0.6442688671225782
--------
observation:  [ 0.0031644   2.          0.          0.30545497  1.         -1.2
 -0.66760869  0.36654596  2.94353652  2.21858197  3.        ]
timesteps:  122
action_pos:  1.2
action_vel:  2.2125356048345566
fx_average:  0.9631410900191865
fy_average:  -2.3096003558436204
target_forc

action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4066662527991591
fy_average:  0.1606106486055603
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5933337472008409
force_error_y:  0.1606106486055603
sobolev_term:  0.3726695567528719
REWARD:  -1.126613952559273
--------
observation:  [-0.56416991  2.          0.         -1.          1.         -1.2
 -0.13595982 -1.2         1.9635458   3.          3.        ]
timesteps:  135
action_pos:  -1.2
action_vel:  2.1428245902061462
fx_average:  1.262891920022414
fy_average:  0.4182284662426309
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.737108079977586
force_error_y:  0.4182284662426309
sobolev_term:  0.405048201054326
REWARD:  -1.5603847472745427
--------
observation:  [-1.181575    2.          0.         -1.          0.14282459 -0.13595982
 -1.2        -1.2         3.          3.          2.14282459]
timesteps:  136
action_pos:  0.7710880994796753
action_vel:  3.0
fx_average:  3.0935404041515064
fy_average:  -0.4

fx_average:  1.8725648476287846
fy_average:  1.0761652623171787
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.1274351523712154
force_error_y:  1.0761652623171787
sobolev_term:  0.5813143422869348
REWARD:  -1.7849147569753288
--------
observation:  [-0.60660969  2.          0.         -1.         -1.         -1.2
 -0.30089128 -1.2         3.          2.61776996  1.        ]
timesteps:  150
action_pos:  -1.2
action_vel:  2.3836080729961395
fx_average:  0.29295427843660415
fy_average:  2.1966162629319443
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.7070457215633958
force_error_y:  2.1966162629319443
sobolev_term:  0.6030686717141079
REWARD:  -4.506730656209448
--------
observation:  [-0.81433913  2.          0.         -1.          0.38360807 -0.30089128
 -1.2        -1.2         2.61776996  1.          2.38360807]
timesteps:  151
action_pos:  -1.2
action_vel:  1.3632067441940308
fx_average:  -0.10991011342278834
fy_average:  1.5820572785946836
target_force_

action_pos:  -1.2
action_vel:  1.3492820858955383
fx_average:  1.4379903178995372
fy_average:  1.047897032827733
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5620096821004628
force_error_y:  1.047897032827733
sobolev_term:  0.5285901941209691
REWARD:  -2.138496909049165
--------
observation:  [-0.66143739  2.          0.         -1.         -0.65071791 -1.2
  0.50993435 -1.2         1.          2.70286429  1.34928209]
timesteps:  165
action_pos:  -1.2
action_vel:  1.2277405858039856
fx_average:  1.193305383404948
fy_average:  1.2996481132506472
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8066946165950519
force_error_y:  1.2996481132506472
sobolev_term:  0.5851372255657187
REWARD:  -2.6914799554114177
--------
observation:  [-1.0808697   2.          0.         -1.         -0.77225941  0.50993435
 -1.2        -1.2         2.70286429  1.34928209  1.22774059]
timesteps:  166
-----------------------------------------
| rollout/                |             |

fx_average:  2.3473537724179185
fy_average:  0.15193342669879645
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.34735377241791854
force_error_y:  0.15193342669879645
sobolev_term:  0.5923157372646958
REWARD:  -1.0916029363814108
--------
observation:  [-0.59397532  2.          0.         -0.46944335  0.39050382  0.0326108
 -1.2        -0.56333202  2.65529597  2.70822871  2.39050382]
timesteps:  178
action_pos:  -1.2
action_vel:  1.3607369661331177
fx_average:  1.9707351991790916
fy_average:  0.5528594422205722
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.029264800820908432
force_error_y:  0.5528594422205722
sobolev_term:  0.5609277986427448
REWARD:  -1.1430520416842254
--------
observation:  [-0.80832671  2.          0.         -1.         -0.63926303 -1.2
 -0.56333202 -1.2         2.70822871  2.39050382  1.36073697]
timesteps:  179
action_pos:  -1.2
action_vel:  1.158855140209198
fx_average:  1.1934979794051446
fy_average:  0.6878151801501392
target_force_

fx_average:  3.135294582004858
fy_average:  0.19769789028808613
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.135294582004858
force_error_y:  0.19769789028808613
sobolev_term:  0.4354005928339038
REWARD:  -1.768393065126848
--------
observation:  [-0.78489505  2.          0.         -0.76028037 -0.19809504  0.99767354
 -0.25378246 -0.91233644  2.53759974  1.9893785   1.80190496]
timesteps:  193
action_pos:  0.4842156887054443
action_vel:  1.0
fx_average:  3.391512675906909
fy_average:  -0.8833239543696584
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.3915126759069092
force_error_y:  -0.8833239543696584
sobolev_term:  0.4153694358450188
REWARD:  -2.6902060661215863
--------
observation:  [ 0.33816915  2.          0.          0.40351307 -1.         -0.25378246
 -0.91233644  0.48421569  1.9893785   1.80190496  1.        ]
timesteps:  194
action_pos:  -1.2
action_vel:  2.437097817659378
fx_average:  4.158958956704216
fy_average:  -0.4835997688899507
target_force

action_pos:  -1.2
action_vel:  1.295695424079895
fx_average:  1.2590361914754868
fy_average:  0.9185685739098584
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7409638085245132
force_error_y:  0.9185685739098584
sobolev_term:  0.5908110019763937
REWARD:  -2.2503433844107654
--------
observation:  [-1.07102457  2.          0.         -1.         -0.70430458  1.2
 -1.2        -1.2         1.8394102   1.06831384  1.29569542]
timesteps:  206
action_pos:  -0.19463717937469482
action_vel:  1.3137966990470886
fx_average:  2.416777533857228
fy_average:  0.4033590849475758
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.41677753385722793
force_error_y:  0.4033590849475758
sobolev_term:  0.5990325841683454
REWARD:  -1.419169202973149
--------
observation:  [-0.192018    2.          0.         -0.16219765 -0.6862033  -1.2
 -1.2        -0.19463718  1.06831384  1.29569542  1.3137967 ]
timesteps:  207
action_pos:  0.742905592918396
action_vel:  1.9647922962903976
fx_average

fx_average:  1.3025928232248627
fy_average:  0.16954713539005528
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6974071767751373
force_error_y:  0.16954713539005528
sobolev_term:  0.24405680116487313
REWARD:  -1.1110111133300657
--------
observation:  [-1.07846413  2.          0.         -0.87769186  0.71651417 -1.2
 -1.2        -1.05323024  2.73631471  3.          2.71651417]
timesteps:  221
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6853338973912901
fy_average:  0.09225640928548652
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.31466610260870986
force_error_y:  0.09225640928548652
sobolev_term:  0.24934820318191517
REWARD:  -0.6562707150761116
--------
observation:  [-1.12048201  2.          0.         -1.          1.         -1.2
 -1.05323024 -1.2         3.          2.71651417  3.        ]
timesteps:  222
action_pos:  -1.2
action_vel:  2.630996286869049
fx_average:  1.7107137494182705
fy_average:  0.07772858060086359
target_force_x:  2.0
target_fo

fx_average:  1.1704407050095123
fy_average:  0.193487223704655
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8295592949904877
force_error_y:  0.193487223704655
sobolev_term:  0.4447916717681489
REWARD:  -1.4678381904632916
--------
observation:  [-0.28127777  2.          0.         -1.          0.3302083  -0.77903724
 -0.34882228 -1.2         2.55302143  3.          2.3302083 ]
timesteps:  234
action_pos:  -1.2
action_vel:  2.476205289363861
fx_average:  1.0132148455402286
fy_average:  1.5015636330387183
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9867851544597714
force_error_y:  1.5015636330387183
sobolev_term:  0.5130576999541896
REWARD:  -3.0014064874526794
--------
observation:  [-1.03525519  2.          0.         -1.          0.47620529 -0.34882228
 -1.2        -1.2         3.          2.3302083   2.47620529]
timesteps:  235
action_pos:  -0.2837335109710693
action_vel:  2.1548896729946136
fx_average:  2.3866282406502446
fy_average:  0.7484249644255

fx_average:  4.203164036415065
fy_average:  -0.46625242997092203
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  2.203164036415065
force_error_y:  -0.46625242997092203
sobolev_term:  0.4798759283625498
REWARD:  -3.1492923947485365
--------
observation:  [-0.32317502  2.          0.         -0.26529604  0.01155829  0.78601756
 -1.2        -0.31835525  1.93325616  2.66687363  2.01155829]
timesteps:  249
action_pos:  -1.2
action_vel:  1.0914331078529358
fx_average:  2.313702304603382
fy_average:  0.42460355228106056
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3137023046033822
force_error_y:  0.42460355228106056
sobolev_term:  0.4665964869659728
REWARD:  -1.2049023438504154
--------
observation:  [-0.9697365   2.          0.         -1.         -0.90856689 -1.2
 -0.31835525 -1.2         2.66687363  2.01155829  1.09143311]
timesteps:  250
-----------RESET!!!!!!!!!!!!!-----------
action_pos:  -1.2
action_vel:  1.428144097328186
fx_average:  0.19751217004762342
fy_av

action_pos:  -1.2
action_vel:  2.8078144788742065
fx_average:  1.0341341626447396
fy_average:  0.9482963377931725
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9658658373552604
force_error_y:  0.9482963377931725
sobolev_term:  0.5044732814335406
REWARD:  -2.418635456581973
--------
observation:  [-1.25699804  2.          0.         -1.          0.80781448 -1.2
 -1.2        -1.2         1.          1.28606969  2.80781448]
timesteps:  13
action_pos:  -1.1142035722732544
action_vel:  1.9769426956772804
fx_average:  0.2500704672351015
fy_average:  0.9940158087189859
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.7499295327648985
force_error_y:  0.9940158087189859
sobolev_term:  0.4696003856096577
REWARD:  -3.213545727093542
--------
observation:  [-1.14893699  2.          0.         -0.92850298 -0.0230573  -1.2
 -1.2        -1.11420357  1.28606969  2.80781448  1.9769427 ]
timesteps:  14
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.57823740398988
fy_average

fx_average:  2.2782818347115215
fy_average:  0.6260719513800306
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.27828183471152146
force_error_y:  0.6260719513800306
sobolev_term:  0.5164597997726733
REWARD:  -1.4208135858642255
--------
observation:  [-0.94716487  2.          0.         -1.          0.13635676 -1.2
 -0.94876649 -1.2         1.45391196  1.          2.13635676]
timesteps:  28
action_pos:  -1.2
action_vel:  1.0
fx_average:  0.49378604746707455
fy_average:  1.0831826398213766
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5062139525329254
force_error_y:  1.0831826398213766
sobolev_term:  0.47883106983386814
REWARD:  -3.0682276621881703
--------
observation:  [-0.74394227  2.          0.         -1.         -1.         -0.94876649
 -1.2        -1.2         1.          2.13635676  1.        ]
timesteps:  29
action_pos:  -1.2
action_vel:  2.5904303789138794
fx_average:  0.46913679513615564
fy_average:  0.5868912866697993
target_force_x:  2.0
target_f

fx_average:  0.29301337351646856
fy_average:  1.5907846699095962
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.7069866264835314
force_error_y:  1.5907846699095962
sobolev_term:  0.3715428585151056
REWARD:  -3.6693141549082333
--------
observation:  [-0.98659258  2.          0.         -1.         -1.         -1.2
 -1.2        -1.2         1.          3.          1.        ]
timesteps:  43
action_pos:  -1.2
action_vel:  2.046109028160572
fx_average:  -0.06276312223710825
fy_average:  2.504275405550782
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -2.0627631222371083
force_error_y:  2.504275405550782
sobolev_term:  0.48060797668796784
REWARD:  -5.047646504475858
--------
observation:  [-1.02661693  2.          0.         -1.          0.04610903 -1.2
 -1.2        -1.2         3.          1.          2.04610903]
timesteps:  44
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|    ep_rew_

fx_average:  0.7379538765140705
fy_average:  0.691179306431637
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2620461234859295
force_error_y:  0.691179306431637
sobolev_term:  0.31711613359133867
REWARD:  -2.2703415635089055
--------
observation:  [-0.94635665  2.          0.         -0.87278438  1.         -0.07257814
 -0.70276766 -1.04734125  1.          1.          3.        ]
timesteps:  56
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.5585827390009698
fy_average:  0.7235521692690776
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4414172609990303
force_error_y:  0.7235521692690776
sobolev_term:  0.24507740843990955
REWARD:  -2.4100468387080176
--------
observation:  [-1.38678207  2.          0.         -1.          1.         -0.70276766
 -1.04734125 -1.2         1.          3.          3.        ]
timesteps:  57
action_pos:  1.2
action_vel:  1.0
fx_average:  2.198149428610562
fy_average:  0.010756629641116244
target_force_x:  2.0
target_force_y:  0.

fx_average:  2.742074482810224
fy_average:  0.21455690463143506
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7420744828102239
force_error_y:  0.21455690463143506
sobolev_term:  0.3551457416573688
REWARD:  -1.3117771290990277
--------
observation:  [-0.96344805  2.          0.         -1.         -0.13103291 -1.2
 -0.37433438 -1.2         1.64374423  1.8735498   1.86896709]
timesteps:  71
action_pos:  -0.6779578685760498
action_vel:  1.0
fx_average:  2.679155483043694
fy_average:  0.24446747512443162
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6791554830436941
force_error_y:  0.24446747512443162
sobolev_term:  0.4020020873514604
REWARD:  -1.3256250455195862
--------
observation:  [-0.61788406  2.          0.         -0.56496489 -1.         -0.37433438
 -1.2        -0.67795787  1.8735498   1.86896709  1.        ]
timesteps:  72
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.274770960333447
fy_average:  0.452034679649439
target_force_x:  2.0
target_force_

fx_average:  3.745103705058325
fy_average:  -0.5037985563072529
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.745103705058325
force_error_y:  -0.5037985563072529
sobolev_term:  0.3612291065526177
REWARD:  -2.6101313679181954
--------
observation:  [-0.14225552  2.          0.         -0.12210917  0.17528036 -1.2
 -1.2        -0.14653101  2.08491053  1.          2.17528036]
timesteps:  84
action_pos:  0.238700795173645
action_vel:  2.45427542924881
fx_average:  2.9709248288620698
fy_average:  -0.817317728879759
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9709248288620698
force_error_y:  -0.817317728879759
sobolev_term:  0.37226861319804716
REWARD:  -2.1605111709398757
--------
observation:  [ 0.15651521  2.          0.          0.19891733  0.45427543 -1.2
 -0.14653101  0.2387008   1.          2.17528036  2.45427543]
timesteps:  85
action_pos:  -0.505894911289215
action_vel:  1.3925638794898987
fx_average:  2.5931584638104925
fy_average:  1.6385749137295424


fx_average:  2.5918676706077224
fy_average:  -0.379863346277984
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5918676706077224
force_error_y:  -0.379863346277984
sobolev_term:  0.47976960445654554
REWARD:  -1.4515006213422519
--------
observation:  [-0.36845065  2.          0.         -0.34901237 -1.          0.26717477
 -0.79271171 -0.41881485  1.26566678  1.09547639  1.        ]
timesteps:  99
action_pos:  -1.2
action_vel:  1.5640276074409485
fx_average:  1.8109952035058792
fy_average:  0.6334740472924455
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.18900479649412083
force_error_y:  0.6334740472924455
sobolev_term:  0.4921227223290968
REWARD:  -1.3146015661156631
--------
observation:  [-1.09278958  2.          0.         -1.         -0.43597239 -0.79271171
 -0.41881485 -1.2         1.09547639  1.          1.56402761]
timesteps:  100
action_pos:  0.29545140266418457
action_vel:  1.206025779247284
fx_average:  2.4999909885792295
fy_average:  0.22990677411

fx_average:  2.15273632710306
fy_average:  -0.24225518317520045
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.15273632710306018
force_error_y:  -0.24225518317520045
sobolev_term:  0.4731826962687274
REWARD:  -0.868174206546988
--------
observation:  [-0.68644553  2.          0.         -0.55584782  0.97535676  1.10200775
 -1.2        -0.66701739  1.          1.          2.97535676]
timesteps:  112
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.935714469643367
fy_average:  -0.01692655267509432
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.06428553035663298
force_error_y:  -0.01692655267509432
sobolev_term:  0.519450246392548
REWARD:  -0.6006623294242753
--------
observation:  [-1.32474498  2.          0.         -1.          1.         -1.2
 -0.66701739 -1.2         1.          2.97535676  3.        ]
timesteps:  113
action_pos:  -1.0281514406204224
action_vel:  1.3166072368621826
fx_average:  1.6175996077141659
fy_average:  0.4712493904121571
target_forc

fx_average:  4.033700036734317
fy_average:  -0.11553873706072115
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  2.0337000367343174
force_error_y:  -0.11553873706072115
sobolev_term:  0.6521065086226893
REWARD:  -2.801345282417728
--------
observation:  [ 0.12574911  2.          0.         -1.          0.1926803  -1.2
  1.05419605 -1.2         2.80800724  1.30190605  2.1926803 ]
timesteps:  127
action_pos:  -0.1380748987197876
action_vel:  2.7263020277023315
fx_average:  3.098159836083122
fy_average:  0.8110276753592734
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.0981598360831222
force_error_y:  0.8110276753592734
sobolev_term:  0.6404983627738489
REWARD:  -2.5496858742162445
--------
observation:  [-0.17391495  2.          0.         -0.11506242  0.72630203  1.05419605
 -1.2        -0.1380749   1.30190605  2.1926803   2.72630203]
timesteps:  128
action_pos:  -1.2
action_vel:  1.406093180179596
fx_average:  2.779869008327304
fy_average:  0.5150134398196836
tar

action_pos:  -0.09986389875411987
action_vel:  3.0
fx_average:  1.8927400053708971
fy_average:  -0.5768991352438662
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.10725999462910285
force_error_y:  -0.5768991352438662
sobolev_term:  0.27640598191321797
REWARD:  -0.960565111786187
--------
observation:  [-0.1662824   2.          0.         -0.08321992  1.          0.41575434
  0.05793278 -0.0998639   1.          1.6658009   3.        ]
timesteps:  141
action_pos:  -0.2462035059928894
action_vel:  1.7966624349355698
fx_average:  2.291626405401782
fy_average:  -1.3002401314130596
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.29162640540178186
force_error_y:  -1.3002401314130596
sobolev_term:  0.24055284324812803
REWARD:  -1.8324193800629696
--------
observation:  [-0.17398421  2.          0.         -0.20516959 -0.20333757  0.05793278
 -0.0998639  -0.24620351  1.6658009   3.          1.79666243]
timesteps:  142
action_pos:  -0.8889510154724121
action_vel:  1.0
f

fx_average:  2.8387475721741606
fy_average:  -0.3047672147980825
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8387475721741606
force_error_y:  -0.3047672147980825
sobolev_term:  0.5048883166948214
REWARD:  -1.6484031036670643
--------
observation:  [-0.16513224  2.          0.         -0.17755795  1.          0.17112265
 -1.00280671 -0.21306953  2.96397471  2.64618468  3.        ]
timesteps:  155
action_pos:  -0.9911654233932494
action_vel:  2.3408282995224
fx_average:  1.9105164389908282
fy_average:  0.3926178799349808
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.08948356100917176
force_error_y:  0.3926178799349808
sobolev_term:  0.521659860686569
REWARD:  -1.0037613016307216
--------
observation:  [-0.02839389  2.          0.         -0.82597119  0.3408283  -1.00280671
 -0.21306953 -0.99116542  2.64618468  3.          2.3408283 ]
timesteps:  156
action_pos:  -1.2
action_vel:  1.261542022228241
fx_average:  1.514793733660278
fy_average:  2.65333278353041

fx_average:  1.5275869359411762
fy_average:  -0.780619590833364
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4724130640588238
force_error_y:  -0.780619590833364
sobolev_term:  0.33440394719600314
REWARD:  -1.587436602088191
--------
observation:  [-0.54565813  2.          0.         -1.          0.03599743 -0.15689238
  0.1132443  -1.2         2.87653828  1.          2.03599743]
timesteps:  170
action_pos:  -0.33832554817199706
action_vel:  2.018527489155531
fx_average:  2.79511033786902
fy_average:  -1.7885618036140958
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7951103378690201
force_error_y:  -1.7885618036140958
sobolev_term:  0.38533644883704615
REWARD:  -2.969008590320162
--------
observation:  [-0.35123716  2.          0.         -0.28193796  0.01852749  0.1132443
 -1.2        -0.33832555  1.          2.03599743  2.01852749]
timesteps:  171
action_pos:  -1.2
action_vel:  2.1214445903897285
fx_average:  1.5186225656777776
fy_average:  -0.03277352093

fx_average:  2.9279258666763748
fy_average:  0.5381439790491702
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9279258666763748
force_error_y:  0.5381439790491702
sobolev_term:  0.6485468897277654
REWARD:  -2.1146167354533105
--------
observation:  [-1.27411612  2.          0.         -1.         -1.          0.97342057
 -1.2        -1.2         3.          3.          1.        ]
timesteps:  183
action_pos:  -0.8435503005981445
action_vel:  2.9312368035316467
fx_average:  3.00467066081415
fy_average:  0.5569041716296762
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.00467066081415
force_error_y:  0.5569041716296762
sobolev_term:  0.6240169421740039
REWARD:  -2.18559177461783
--------
observation:  [-0.16853591  2.          0.         -0.70295858  0.9312368  -1.2
 -1.2        -0.8435503   3.          1.          2.9312368 ]
timesteps:  184
action_pos:  -1.2
action_vel:  2.3554546236991882
fx_average:  2.0114842141105345
fy_average:  1.494133260096348
target_fo

fx_average:  2.8377692083052586
fy_average:  -2.392829269492613
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8377692083052586
force_error_y:  -2.392829269492613
sobolev_term:  0.6472390278629094
REWARD:  -3.8778375056607812
--------
observation:  [ 0.00825132  2.          0.          0.14690745 -0.27501971  1.2
 -1.2         0.17628894  1.45820123  3.          1.72498029]
timesteps:  198
action_pos:  0.5473425865173339
action_vel:  1.2003746628761292
fx_average:  2.211595527955265
fy_average:  -1.5528769978645445
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.21159552795526482
force_error_y:  -1.5528769978645445
sobolev_term:  0.5210154314295902
REWARD:  -2.2854879572493996
--------
observation:  [ 0.40836461  2.          0.          0.45611882 -0.79962534 -1.2
  0.17628894  0.54734259  3.          1.72498029  1.20037466]
timesteps:  199
action_pos:  0.47170808315277096
action_vel:  1.0
fx_average:  1.4102571379118738
fy_average:  -1.900364130856013
target_f

fx_average:  2.907725389434065
fy_average:  -1.8266716324300494
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9077253894340651
force_error_y:  -1.8266716324300494
sobolev_term:  0.328947053661335
REWARD:  -3.0633440755254493
--------
observation:  [ 0.84348176  2.          0.          0.81036276 -1.         -0.54926659
  1.2         0.97243531  2.15583649  2.67509645  1.        ]
timesteps:  211
action_pos:  -0.9287339687347411
action_vel:  1.4693863987922668
fx_average:  3.370898400816122
fy_average:  -0.8937059458197075
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.370898400816122
force_error_y:  -0.8937059458197075
sobolev_term:  0.45405763283516487
REWARD:  -2.7186619794709945
--------
observation:  [-0.28707234  2.          0.         -0.77394497 -0.5306136   1.2
  0.97243531 -0.92873397  2.67509645  1.          1.4693864 ]
timesteps:  212
action_pos:  1.2
action_vel:  2.165737807750702
fx_average:  2.2781565983991183
fy_average:  -0.25903290762214
targ

fx_average:  0.23482468510975404
fy_average:  0.2639167374359678
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.765175314890246
force_error_y:  0.2639167374359678
sobolev_term:  0.3040226403822506
REWARD:  -2.3331146927084645
--------
observation:  [-1.26761792  2.          0.         -1.         -0.45497254 -0.74351878
 -1.2        -1.2         3.          1.25784802  1.54502746]
timesteps:  226
action_pos:  -0.7018637895584107
action_vel:  1.4234054684638977
fx_average:  1.8497805768362734
fy_average:  0.13038194089361219
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.1502194231637266
force_error_y:  0.13038194089361219
sobolev_term:  0.35207592001803195
REWARD:  -0.6326772840753707
--------
observation:  [-0.62013796  2.          0.         -0.58488649 -0.57659453 -1.2
 -1.2        -0.70186379  1.25784802  1.54502746  1.42340547]
timesteps:  227
action_pos:  -0.4702824354171753
action_vel:  2.501785635948181
fx_average:  1.3733324049378781
fy_average:  0.

fx_average:  1.8975501185873085
fy_average:  0.5315138933204675
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.10244988141269151
force_error_y:  0.5315138933204675
sobolev_term:  0.3005301793230313
REWARD:  -0.9344939540561903
--------
observation:  [-0.64436477  2.          0.         -0.76474631 -1.         -0.09028916
 -1.2        -0.91769557  1.39623612  2.65468895  1.        ]
timesteps:  239
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.111283048515565
fy_average:  -0.18597588836605755
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.11128304851556514
force_error_y:  -0.18597588836605755
sobolev_term:  0.3159594764367739
REWARD:  -0.6132184133183967
--------
observation:  [-0.87604457  2.          0.         -1.          1.         -1.2
 -0.91769557 -1.2         2.65468895  1.          3.        ]
timesteps:  240
action_pos:  0.8198822736740112
action_vel:  1.9053733870387077
fx_average:  1.4335466324725927
fy_average:  0.44282400545800593
target_forc

fx_average:  1.7774292573250368
fy_average:  -0.07266640818181713
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.22257074267496324
force_error_y:  -0.07266640818181713
sobolev_term:  0.3393275742405096
REWARD:  -0.6345647250972899
--------
observation:  [ 0.12035667  2.          0.          0.229305    1.         -0.62580743
 -1.2         0.275166    1.11837786  2.19912067  3.        ]
timesteps:  4
action_pos:  -0.5105848431587219
action_vel:  1.0
fx_average:  1.8468907943585486
fy_average:  0.3145658197447704
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.15310920564145136
force_error_y:  0.3145658197447704
sobolev_term:  0.4198953818213114
REWARD:  -0.8875704072075332
--------
observation:  [-0.32962005  2.          0.         -0.42548737 -1.         -1.2
  0.275166   -0.51058484  2.19912067  3.          1.        ]
timesteps:  5
action_pos:  -1.2
action_vel:  1.7295652627944946
fx_average:  1.6044686774870052
fy_average:  0.4944756506516425
target_force_

action_pos:  -1.0978766441345214
action_vel:  1.9763983450829983
fx_average:  1.4615629988680072
fy_average:  -0.08839232314678114
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5384370011319928
force_error_y:  -0.08839232314678114
sobolev_term:  0.30860432484181766
REWARD:  -0.9354336491205916
--------
observation:  [-0.99965201  2.          0.         -0.9148972  -0.02360165 -1.2
 -1.2        -1.09787664  2.38030469  2.44002005  1.97639835]
timesteps:  19
action_pos:  -1.2
action_vel:  1.6446951925754547
fx_average:  2.1323768727816885
fy_average:  1.8680789564316092
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.13237687278168853
force_error_y:  1.8680789564316092
sobolev_term:  0.2831536875918398
REWARD:  -2.2836095168051376
--------
observation:  [-1.28929371  2.          0.         -1.         -0.35530481 -1.2
 -1.09787664 -1.2         2.44002005  1.97639835  1.64469519]
timesteps:  20
action_pos:  -0.47002412080764766
action_vel:  1.0
fx_average:  1.95

fx_average:  1.4565067228770912
fy_average:  1.7894149149908125
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5434932771229088
force_error_y:  1.7894149149908125
sobolev_term:  0.5662415207586129
REWARD:  -2.899149712872334
--------
observation:  [-0.87462772  2.          0.         -1.         -0.4920623  -1.2
  0.11088574 -1.2         1.          2.05588873  1.5079377 ]
timesteps:  34
action_pos:  -1.2
action_vel:  2.732646882534027
fx_average:  0.9720378986793498
fy_average:  1.8546808509875412
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0279621013206501
force_error_y:  1.8546808509875412
sobolev_term:  0.5558128799739378
REWARD:  -3.438455832282129
--------
observation:  [-1.10807201  2.          0.         -1.          0.73264688  0.11088574
 -1.2        -1.2         2.05588873  1.5079377   2.73264688]
timesteps:  35
action_pos:  -0.44226815700531
action_vel:  3.0
fx_average:  2.336674099140929
fy_average:  0.013772998721103893
target_force_x:  2.0


fx_average:  2.520914901000702
fy_average:  -0.9536580584459053
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5209149010007019
force_error_y:  -0.9536580584459053
sobolev_term:  0.6019847036867044
REWARD:  -2.0765576631333116
--------
observation:  [-0.60047243  2.          0.         -0.52781773  0.30646738  0.28886304
 -0.9681982  -0.63338127  3.          1.49657333  2.30646738]
timesteps:  49
action_pos:  -1.2
action_vel:  2.1658263951539993
fx_average:  1.0526519044417735
fy_average:  0.1738513434989532
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9473480955582265
force_error_y:  0.1738513434989532
sobolev_term:  0.6179880390273346
REWARD:  -1.7391874780845145
--------
observation:  [-0.92740132  2.          0.         -1.          0.1658264  -0.9681982
 -0.63338127 -1.2         1.49657333  2.30646738  2.1658264 ]
timesteps:  50
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |

fx_average:  0.4521029090363862
fy_average:  0.9525927507749481
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5478970909636138
force_error_y:  0.9525927507749481
sobolev_term:  0.4524275461201992
REWARD:  -2.9529173878587613
--------
observation:  [-1.06349934  2.          0.         -1.         -0.67175275 -0.86099675
 -0.24914918 -1.2         1.84272081  1.57259697  1.32824725]
timesteps:  62
action_pos:  -1.2
action_vel:  2.5632729530334473
fx_average:  0.4933733949665358
fy_average:  0.9831436695788288
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5066266050334642
force_error_y:  0.9831436695788288
sobolev_term:  0.4558276087226267
REWARD:  -2.9455978833349197
--------
observation:  [-1.10306278  2.          0.         -1.          0.56327295 -0.24914918
 -1.2        -1.2         1.57259697  1.32824725  2.56327295]
timesteps:  63
action_pos:  -0.7765776872634887
action_vel:  1.8765091225504875
fx_average:  1.4331049354800773
fy_average:  0.542289497426

fx_average:  0.12406528519716373
fy_average:  0.4178402253972789
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.8759347148028362
force_error_y:  0.4178402253972789
sobolev_term:  0.27357203393731594
REWARD:  -2.567346974137431
--------
observation:  [-1.24230261  2.          0.         -1.          0.47221208 -1.2
 -1.19712739 -1.2         2.10231339  3.          2.47221208]
timesteps:  77
action_pos:  -1.2
action_vel:  1.1087943315505981
fx_average:  0.8833925947999521
fy_average:  -0.05800913087027703
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1166074052000479
force_error_y:  -0.05800913087027703
sobolev_term:  0.140412014319604
REWARD:  -1.3150285503899288
--------
observation:  [-1.48564735  2.          0.         -1.         -0.89120567 -1.19712739
 -1.2        -1.2         3.          2.47221208  1.10879433]
timesteps:  78
action_pos:  -0.7681382417678833
action_vel:  1.4172177910804749
fx_average:  1.9429934447477337
fy_average:  0.058395757531807

fx_average:  2.1847536967247967
fy_average:  -0.17706466259505693
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.1847536967247967
force_error_y:  -0.17706466259505693
sobolev_term:  0.5903239147849493
REWARD:  -0.9521422741048029
--------
observation:  [-0.14406898  2.          0.          0.20191252  0.4180932  -0.11043799
 -1.2         0.24229503  3.          2.64542705  2.4180932 ]
timesteps:  90
action_pos:  0.11757051944732666
action_vel:  1.0
fx_average:  2.809805952633757
fy_average:  -1.144750213031665
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8098059526337571
force_error_y:  -1.144750213031665
sobolev_term:  0.5088004174599472
REWARD:  -2.4633565831253694
--------
observation:  [ 0.09294877  2.          0.          0.09797543 -1.         -1.2
  0.24229503  0.11757052  2.64542705  2.4180932   1.        ]
timesteps:  91
action_pos:  0.6570664644241333
action_vel:  1.0
fx_average:  2.1427560018854948
fy_average:  -1.7178087283357186
target_force_x: 

action_pos:  -1.1255202770233155
action_vel:  1.0
fx_average:  1.8942217548812206
fy_average:  0.25151244317722654
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.10577824511877942
force_error_y:  0.25151244317722654
sobolev_term:  0.31992004058042345
REWARD:  -0.6772107288764294
--------
observation:  [-0.93855714  2.          0.         -0.93793356 -1.         -1.2
 -1.2        -1.12552028  1.31038743  1.62046075  1.        ]
timesteps:  105
action_pos:  -0.4977417469024658
action_vel:  1.0
fx_average:  1.4881004878261979
fy_average:  1.2570843558668439
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5118995121738021
force_error_y:  1.2570843558668439
sobolev_term:  0.31012134368486793
REWARD:  -2.0791052117255138
--------
observation:  [-0.43526622  2.          0.         -0.41478479 -1.         -1.2
 -1.12552028 -0.49774175  1.62046075  1.          1.        ]
timesteps:  106
action_pos:  0.5216936588287353
action_vel:  2.3195332288742065
fx_average:  1.15

fx_average:  2.2297923425633517
fy_average:  -0.5123891315122512
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.22979234256335168
force_error_y:  -0.5123891315122512
sobolev_term:  0.3367840404480589
REWARD:  -1.0789655145236616
--------
observation:  [ 0.125517    2.          0.          0.75018632  1.         -1.0852355
 -1.2         0.90022359  1.          1.42923903  3.        ]
timesteps:  118
action_pos:  -1.2
action_vel:  2.2858124375343323
fx_average:  3.0786342646878215
fy_average:  -0.0819129594645862
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.0786342646878215
force_error_y:  -0.0819129594645862
sobolev_term:  0.40099480123819586
REWARD:  -1.5615420253906036
--------
observation:  [-0.34807862  2.          0.         -1.          0.28581244 -1.2
  0.90022359 -1.2         1.42923903  3.          2.28581244]
timesteps:  119
action_pos:  -0.00921485424041748
action_vel:  3.0
fx_average:  2.5189143123550224
fy_average:  -0.17593245601756488
target_fo

fx_average:  1.9768751657924453
fy_average:  1.3556831032239085
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.023124834207554734
force_error_y:  1.3556831032239085
sobolev_term:  0.3060938959392755
REWARD:  -1.6849018333707386
--------
observation:  [-0.67252023  2.          0.         -1.         -0.23099044 -0.23265944
  0.58623472 -1.2         1.          1.          1.76900956]
timesteps:  133
action_pos:  -1.0356411695480345
action_vel:  3.0
fx_average:  2.2843736723423054
fy_average:  1.0102472965726743
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2843736723423054
force_error_y:  1.0102472965726743
sobolev_term:  0.32002008167343504
REWARD:  -1.6146410505884148
--------
observation:  [-0.96575869  2.          0.         -0.86303431  1.          0.58623472
 -1.2        -1.03564117  1.          1.76900956  3.        ]
timesteps:  134
action_pos:  -0.14681518077850342
action_vel:  1.14755380153656
fx_average:  3.905699101577411
fy_average:  -0.014282594

action_pos:  1.0588615894317626
action_vel:  1.5838268995285034
fx_average:  2.854460931281638
fy_average:  -0.8053280055901765
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.854460931281638
force_error_y:  -0.8053280055901765
sobolev_term:  0.5726077769051502
REWARD:  -2.2323967137769647
--------
observation:  [ 0.10329698  2.          0.          0.88238466 -0.4161731  -0.30084207
 -1.2         1.05886159  1.          3.          1.5838269 ]
timesteps:  147
action_pos:  -0.3651214599609375
action_vel:  1.452409565448761
fx_average:  2.2763547189177142
fy_average:  -0.13821149825935825
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.27635471891771424
force_error_y:  -0.13821149825935825
sobolev_term:  0.5594837400664537
REWARD:  -0.9740499572435262
--------
observation:  [ 2.64923078e-03  2.00000000e+00  0.00000000e+00 -3.04267883e-01
 -5.47590435e-01 -1.20000000e+00  1.05886159e+00 -3.65121460e-01
  3.00000000e+00  1.58382690e+00  1.45240957e+00]
timesteps:  

fx_average:  1.7227340681244603
fy_average:  0.6768122194203479
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2772659318755397
force_error_y:  0.6768122194203479
sobolev_term:  0.5049561868046684
REWARD:  -1.459034338100556
--------
observation:  [-0.36285366  2.          0.         -0.75349319  0.39806265 -1.2
 -1.2        -0.90419183  3.          1.          2.39806265]
timesteps:  161
action_pos:  -0.8462943077087403
action_vel:  3.0
fx_average:  2.495069797500477
fy_average:  0.49278363540682446
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4950697975004772
force_error_y:  0.49278363540682446
sobolev_term:  0.5185810844962897
REWARD:  -1.5064345174035914
--------
observation:  [-1.02857404  2.          0.         -0.70524526  1.         -1.2
 -0.90419183 -0.84629431  1.          2.39806265  3.        ]
timesteps:  162
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0166479163283235
fy_average:  1.4241032372518887
target_force_x:  2.0
target_force_y:  

fx_average:  2.342719739911048
fy_average:  0.666893902988557
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3427197399110482
force_error_y:  0.666893902988557
sobolev_term:  0.5157877062989483
REWARD:  -1.5254013491985536
--------
observation:  [-0.41037734  2.          0.         -0.37210503  0.0874465   1.02599645
 -1.2        -0.44652604  1.34200042  2.38930166  2.0874465 ]
timesteps:  176
action_pos:  -0.6718572378158569
action_vel:  2.6168617010116577
fx_average:  2.743136446634816
fy_average:  -0.030475035049359084
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.743136446634816
force_error_y:  -0.030475035049359084
sobolev_term:  0.44458825890973824
REWARD:  -1.2181997405939133
--------
observation:  [-0.60089362  2.          0.         -0.55988103  0.6168617  -1.2
 -0.44652604 -0.67185724  2.38930166  2.0874465   2.6168617 ]
timesteps:  177
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.8940515575699108
fy_average:  0.5292929160542938
target_force_x:

fx_average:  1.1823922680263164
fy_average:  0.9841053211756957
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8176077319736836
force_error_y:  0.9841053211756957
sobolev_term:  0.4793222854629384
REWARD:  -2.281035338612318
--------
observation:  [-0.43096809  2.          0.         -0.63487995  0.67248875 -0.73282399
 -1.2        -0.76185594  2.58418     1.24270809  2.67248875]
timesteps:  189
action_pos:  -1.2
action_vel:  1.2131370902061462
fx_average:  0.14699150950817638
fy_average:  1.6257364894098156
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.8530084904918236
force_error_y:  1.6257364894098156
sobolev_term:  0.4416331833687346
REWARD:  -3.9203781632703736
--------
observation:  [-1.06822749  2.          0.         -1.         -0.78686291 -1.2
 -0.76185594 -1.2         1.24270809  2.67248875  1.21313709]
timesteps:  190
action_pos:  -0.15015360116958618
action_vel:  2.074120879173279
fx_average:  2.44983197948465
fy_average:  -0.1185572743714485
t

fx_average:  2.6068030982544346
fy_average:  -0.03162039731059394
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6068030982544346
force_error_y:  -0.03162039731059394
sobolev_term:  0.5176175926000159
REWARD:  -1.1560410881650445
--------
observation:  [-0.7071094   2.          0.         -0.68832105 -0.17731504 -1.2
 -0.33470224 -0.82598526  2.53734088  2.38175288  1.82268496]
timesteps:  204
action_pos:  -0.34046409130096433
action_vel:  1.9662073254585266
fx_average:  2.941267053542327
fy_average:  -0.530708888156559
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9412670535423269
force_error_y:  -0.530708888156559
sobolev_term:  0.5035962975590177
REWARD:  -1.9755722392579038
--------
observation:  [-0.34197212  2.          0.         -0.28372008 -0.03379267 -0.33470224
 -0.82598526 -0.34046409  2.38175288  1.82268496  1.96620733]
timesteps:  205
action_pos:  -0.8722305536270142
action_vel:  3.0
fx_average:  1.6998032067208773
fy_average:  0.597680958016570

fx_average:  2.356490432254365
fy_average:  -0.4647920327862747
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.35649043225436516
force_error_y:  -0.4647920327862747
sobolev_term:  0.3917755256898643
REWARD:  -1.2130579907305041
--------
observation:  [-0.56915814  2.          0.         -1.          1.         -1.2
 -0.74731886 -1.2         3.          3.          3.        ]
timesteps:  217
action_pos:  -1.1386202573776245
action_vel:  2.6113757491111755
fx_average:  2.187056235989153
fy_average:  -0.08531857472874634
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.18705623598915322
force_error_y:  -0.08531857472874634
sobolev_term:  0.45117976640202123
REWARD:  -0.7235545771199208
--------
observation:  [-1.08270735  2.          0.         -0.94885021  0.61137575 -0.74731886
 -1.2        -1.13862026  3.          3.          2.61137575]
timesteps:  218
action_pos:  0.060801565647125244
action_vel:  2.62985360622406
fx_average:  1.9270633816325153
fy_average:  

fx_average:  2.4328080081293546
fy_average:  1.0254602561608601
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4328080081293546
force_error_y:  1.0254602561608601
sobolev_term:  0.6310753981322348
REWARD:  -2.0893436624224497
--------
observation:  [-1.17483587  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.68626964  3.          3.        ]
timesteps:  232
action_pos:  -0.6014908790588379
action_vel:  1.8586934208869934
fx_average:  2.927720873764285
fy_average:  0.37248176864085364
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9277208737642848
force_error_y:  0.37248176864085364
sobolev_term:  0.5735957755275451
REWARD:  -1.8737984179326836
--------
observation:  [-1.00645073  2.          0.         -0.5012424  -0.14130658 -1.2
 -1.2        -0.60149088  3.          3.          1.85869342]
timesteps:  233
action_pos:  -1.2
action_vel:  1.9356783032417297
fx_average:  2.327818056512296
fy_average:  0.8528087989789946
target_fo

action_pos:  -0.2513574242591858
action_vel:  1.2802848815917969
fx_average:  2.7861148099641015
fy_average:  -0.9477151930749551
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7861148099641015
force_error_y:  -0.9477151930749551
sobolev_term:  0.33203553451650425
REWARD:  -2.065865537555561
--------
observation:  [-0.32479581  2.          0.         -0.20946452 -0.71971512 -0.86464691
 -1.16746531 -0.25135742  2.08671024  2.24068457  1.28028488]
timesteps:  245
action_pos:  0.08835368156433106
action_vel:  3.0
fx_average:  2.9175553015723596
fy_average:  0.042364842275313584
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9175553015723596
force_error_y:  0.042364842275313584
sobolev_term:  0.3277346677642026
REWARD:  -1.2876548116118758
--------
observation:  [ 0.11203289  2.          0.          0.07362807  1.         -1.16746531
 -0.25135742  0.08835368  2.24068457  1.28028488  3.        ]
timesteps:  246
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.062

fx_average:  2.2219270189399043
fy_average:  0.6821768874265938
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2219270189399043
force_error_y:  0.6821768874265938
sobolev_term:  0.4147973957656583
REWARD:  -1.3189013021321565
--------
observation:  [ 0.05380511  2.          0.          0.30961579  0.4758029  -1.2
 -1.2         0.37153895  1.47145915  1.56882912  2.4758029 ]
timesteps:  9
action_pos:  -1.2
action_vel:  2.869720697402954
fx_average:  2.579833071605944
fy_average:  -0.07965639395442395
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5798330716059441
force_error_y:  -0.07965639395442395
sobolev_term:  0.48111144069529715
REWARD:  -1.1406009062556652
--------
observation:  [-0.2693233   2.          0.         -1.          0.8697207  -1.2
  0.37153895 -1.2         1.56882912  2.4758029   2.8697207 ]
timesteps:  10
action_pos:  -0.42676180601119995
action_vel:  2.3253610730171204
fx_average:  2.1854491352835024
fy_average:  0.46478481421365603
target_

fx_average:  2.2997618327048426
fy_average:  0.6208564636506843
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2997618327048426
force_error_y:  0.6208564636506843
sobolev_term:  0.6234767600663944
REWARD:  -1.5440950564219214
--------
observation:  [ 0.19200668  2.          0.          0.12607342  1.         -1.2
  0.2122432   0.1512881   1.38313168  3.          3.        ]
timesteps:  24
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|    ep_rew_mean          | -478        |
| time/                   |             |
|    fps                  | 6           |
|    iterations           | 32          |
|    time_elapsed         | 150         |
|    total_timesteps      | 1024        |
| train/                  |             |
|    approx_kl            | 0.042239755 |
|    clip_fraction        | 0.253       |
|    clip_range           | 0.2         |
|    entropy_loss         | -2.8        |
|    ex

fx_average:  2.415424446259223
fy_average:  -0.8348115996831225
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.41542444625922315
force_error_y:  -0.8348115996831225
sobolev_term:  0.5106135479148363
REWARD:  -1.760849593857182
--------
observation:  [-1.09445415  2.          0.         -1.          0.31891233  0.4643584
 -1.2        -1.2         2.04309335  3.          2.31891233]
timesteps:  37
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.6612896391771557
fy_average:  -0.8825072366185531
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6612896391771557
force_error_y:  -0.8825072366185531
sobolev_term:  0.5003302536167575
REWARD:  -2.044127129412466
--------
observation:  [-1.17116692  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.31891233  3.        ]
timesteps:  38
action_pos:  -1.2
action_vel:  2.414807230234146
fx_average:  1.0446193182435297
fy_average:  -0.20000629941044648
target_force_x:  2.0
target_forc

fx_average:  1.854724667911216
fy_average:  0.7203249416253074
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.14527533208878407
force_error_y:  0.7203249416253074
sobolev_term:  0.45043647831827577
REWARD:  -1.3160367520323673
--------
observation:  [-0.03856302  2.          0.          0.44085112 -0.42917523 -1.2
 -1.2         0.52902135  2.41706201  1.          1.57082477]
timesteps:  52
action_pos:  -1.2
action_vel:  2.1972056478261948
fx_average:  1.985400136721809
fy_average:  1.5003931335557092
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.014599863278190961
force_error_y:  1.5003931335557092
sobolev_term:  0.5048700798829802
REWARD:  -2.0198630767168804
--------
observation:  [-0.55876351  2.          0.         -1.          0.19720565 -1.2
  0.52902135 -1.2         1.          1.57082477  2.19720565]
timesteps:  53
action_pos:  -0.8929022312164306
action_vel:  2.749984562397003
fx_average:  2.2574629322452204
fy_average:  1.4876934534957245
target_f

fx_average:  2.3922566544906676
fy_average:  0.9364120700443311
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3922566544906676
force_error_y:  0.9364120700443311
sobolev_term:  0.49520325012415994
REWARD:  -1.8238719746591585
--------
observation:  [-0.50081709  2.          0.         -0.21614587  0.67864913 -1.2
 -1.2        -0.25937505  3.          2.77279723  2.67864913]
timesteps:  65
action_pos:  0.2769121170043945
action_vel:  3.0
fx_average:  2.8739368036277395
fy_average:  -0.4721786322260512
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8739368036277395
force_error_y:  -0.4721786322260512
sobolev_term:  0.46449322342932226
REWARD:  -1.810608659283113
--------
observation:  [ 0.1354929   2.          0.          0.2307601   1.         -1.2
 -0.25937505  0.27691212  2.77279723  2.67864913  3.        ]
timesteps:  66
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.277881269287112
fy_average:  -0.029195407537903294
target_force_x:  2.0
target_force_y: 

fx_average:  1.6688030731719754
fy_average:  0.3658361264192847
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3311969268280246
force_error_y:  0.3658361264192847
sobolev_term:  0.4505221959280285
REWARD:  -1.1475552491753378
--------
observation:  [-1.26967155  2.          0.         -1.          1.         -1.03365569
 -1.2        -1.2         1.59125727  1.36791927  3.        ]
timesteps:  80
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.8992287530603258
fy_average:  0.16601463736372868
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.10077124693967421
force_error_y:  0.16601463736372868
sobolev_term:  0.42638280816145396
REWARD:  -0.6931686924648568
--------
observation:  [-1.25362892  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.36791927  3.          3.        ]
timesteps:  81
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.4002947068290494
fy_average:  0.48197077374442765
target_force_x:  2.0
target_force_y:  0.0


fx_average:  2.5640499367879683
fy_average:  -0.4828327478876982
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5640499367879683
force_error_y:  -0.4828327478876982
sobolev_term:  0.5728570271161276
REWARD:  -1.619739711791794
--------
observation:  [ 0.07452417  2.          0.          0.24181128  0.56719708 -0.10535531
 -1.2         0.29017353  1.          3.          2.56719708]
timesteps:  93
action_pos:  -1.2
action_vel:  1.0
fx_average:  2.503800415826469
fy_average:  -0.014412087747390103
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5038004158264688
force_error_y:  -0.014412087747390103
sobolev_term:  0.6218368082433461
REWARD:  -1.140049311817205
--------
observation:  [-0.38466231  2.          0.         -1.         -1.         -1.2
  0.29017353 -1.2         3.          2.56719708  1.        ]
timesteps:  94
action_pos:  -0.15781781673431397
action_vel:  1.0
fx_average:  3.165072291615995
fy_average:  -0.2542011817286246
target_force_x:  2.0
target_

fx_average:  2.2058762765349367
fy_average:  -0.10620962045169587
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.20587627653493668
force_error_y:  -0.10620962045169587
sobolev_term:  0.40653996882626525
REWARD:  -0.7186258658128978
--------
observation:  [-1.23548351  2.          0.         -1.          0.55302137 -1.02056909
 -1.2        -1.2         3.          3.          2.55302137]
timesteps:  108
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.3841165207180195
fy_average:  0.04189158277688268
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6158834792819805
force_error_y:  0.04189158277688268
sobolev_term:  0.3240269704241468
REWARD:  -0.9818020324830099
--------
observation:  [-1.21747916  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.55302137  3.        ]
timesteps:  109
action_pos:  0.06283764839172364
action_vel:  2.212761700153351
fx_average:  2.70498510868088
fy_average:  -0.496292246522566
target_forc

action_pos:  -1.2
action_vel:  2.529507875442505
fx_average:  0.6305004799129894
fy_average:  0.8659403921027903
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3694995200870106
force_error_y:  0.8659403921027903
sobolev_term:  0.34416975372412145
REWARD:  -2.5796096659139223
--------
observation:  [-1.22285707  2.          0.         -1.          0.52950788 -1.2
 -1.2        -1.2         1.          3.          2.52950788]
timesteps:  121
action_pos:  -0.4747992753982544
action_vel:  3.0
fx_average:  1.905110382790529
fy_average:  0.5598405975171222
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.09488961720947109
force_error_y:  0.5598405975171222
sobolev_term:  0.41095866804934317
REWARD:  -1.0656888827759365
--------
observation:  [-0.91800833  2.          0.         -0.39566606  1.         -1.2
 -1.2        -0.47479928  3.          2.52950788  3.        ]
timesteps:  122
action_pos:  -1.2
action_vel:  1.516802966594696
fx_average:  1.999282463935584
fy_av

fx_average:  0.09879889224222173
fy_average:  1.5369705859284875
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.9012011077577782
force_error_y:  1.5369705859284875
sobolev_term:  0.41146883228899794
REWARD:  -3.8496405259752633
--------
observation:  [-1.08225003  2.          0.         -1.         -0.21038646 -1.2
 -0.72951686 -1.2         3.          2.01367688  1.78961354]
timesteps:  136
action_pos:  -1.2
action_vel:  1.165294885635376
fx_average:  0.4342415456038854
fy_average:  1.393062785965238
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5657584543961147
force_error_y:  1.393062785965238
sobolev_term:  0.40248406686028015
REWARD:  -3.361305307221633
--------
observation:  [-1.06362818  2.          0.         -1.         -0.83470511 -0.72951686
 -1.2        -1.2         2.01367688  1.78961354  1.16529489]
timesteps:  137
action_pos:  -1.2
action_vel:  1.766108512878418
fx_average:  0.4467546772411852
fy_average:  0.9553338370788352
target_force_x:  

fx_average:  2.035399455070677
fy_average:  0.37502096190252815
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.0353994550706771
force_error_y:  0.37502096190252815
sobolev_term:  0.34151835973877276
REWARD:  -0.7519387767119781
--------
observation:  [-1.17345846  2.          0.         -1.          0.51581264 -0.94892678
 -1.2        -1.2         2.09672177  3.          2.51581264]
timesteps:  151
action_pos:  -0.9977045059204102
action_vel:  1.583501935005188
fx_average:  1.4927577754219339
fy_average:  0.024839490862320734
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5072422245780661
force_error_y:  0.024839490862320734
sobolev_term:  0.23564882463745826
REWARD:  -0.7677305400778451
--------
observation:  [-1.27290317  2.          0.         -0.83142042 -0.41649806 -1.2
 -1.2        -0.99770451  3.          2.51581264  1.58350194]
timesteps:  152
------------------------------------------
| rollout/                |              |
|    ep_len_mean       

fx_average:  1.3528588718729375
fy_average:  1.1893753189936007
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6471411281270625
force_error_y:  1.1893753189936007
sobolev_term:  0.5073688570685548
REWARD:  -2.343885304189218
--------
observation:  [-1.29607408  2.          0.         -1.          1.         -1.2
 -1.05846527 -1.2         1.80917889  2.33139282  3.        ]
timesteps:  164
action_pos:  -1.2
action_vel:  2.5827757120132446
fx_average:  1.596681355887678
fy_average:  0.945034200884244
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.403318644112322
force_error_y:  0.945034200884244
sobolev_term:  0.4833601679165657
REWARD:  -1.8317130129131316
--------
observation:  [-1.21457895  2.          0.         -1.          0.58277571 -1.05846527
 -1.2        -1.2         2.33139282  3.          2.58277571]
timesteps:  165
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.723407149769888
fy_average:  0.030093068928914227
target_force_x:  2.0
target_force_

fx_average:  3.2920037294135316
fy_average:  0.14216915593364396
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.2920037294135316
force_error_y:  0.14216915593364396
sobolev_term:  0.6476337572779111
REWARD:  -2.0818066426250867
--------
observation:  [-0.64523989  2.          0.         -0.55379623  1.         -0.08948307
 -1.2        -0.66455548  3.          3.          3.        ]
timesteps:  179
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4368327123000333
fy_average:  0.9031089461893108
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5631672876999667
force_error_y:  0.9031089461893108
sobolev_term:  0.6388807832194342
REWARD:  -2.1051570171087115
--------
observation:  [-0.53882078  2.          0.         -1.          1.         -1.2
 -0.66455548 -1.2         3.          3.          3.        ]
timesteps:  180
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.2149549416315462
fy_average:  1.888131121934235
target_force_x:  2.0
target_force_y:  0.0
for

fx_average:  1.290035416404899
fy_average:  0.2484416995218602
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.709964583595101
force_error_y:  0.2484416995218602
sobolev_term:  0.21546502459423245
REWARD:  -1.1738713077111935
--------
observation:  [-1.16622101  2.          0.         -1.          0.92475855 -1.2
 -1.2        -1.2         2.74753606  2.19754398  2.92475855]
timesteps:  192
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.1473553852552125
fy_average:  0.2995579492759434
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8526446147447875
force_error_y:  0.2995579492759434
sobolev_term:  0.18295433976995407
REWARD:  -1.335156903790685
--------
observation:  [-1.42641268  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.19754398  2.92475855  3.        ]
timesteps:  193
action_pos:  -1.2
action_vel:  2.52206951379776
fx_average:  1.3151408583832653
fy_average:  0.22269460470796498
target_force_x:  2.0
target_force_y:  0

fx_average:  3.248746444269888
fy_average:  0.8869598661087794
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.2487464442698881
force_error_y:  0.8869598661087794
sobolev_term:  0.7156963614177305
REWARD:  -2.8514026717963983
--------
observation:  [-0.99736109  2.          0.         -1.          0.07870448  0.12254505
 -1.2        -1.2         3.          2.26135862  2.07870448]
timesteps:  207
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4522919434169406
fy_average:  1.3625687213224087
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5477080565830594
force_error_y:  1.3625687213224087
sobolev_term:  0.6835515746100753
REWARD:  -2.5938283525155432
--------
observation:  [-1.2207648   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.26135862  2.07870448  3.        ]
timesteps:  208
action_pos:  -1.2
action_vel:  1.5859431028366089
fx_average:  2.058980317095859
fy_average:  0.951960152576015
target_force_x:  2.0
target_force_

fx_average:  1.6490701903182996
fy_average:  0.27275944261408375
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3509298096817004
force_error_y:  0.27275944261408375
sobolev_term:  0.24637930733033125
REWARD:  -0.8700685596261155
--------
observation:  [-1.04270499  2.          0.         -1.         -0.18931222 -1.2
 -1.2        -1.2         1.47798181  2.17999583  1.81068778]
timesteps:  220
action_pos:  -1.2
action_vel:  2.3981858789920807
fx_average:  1.2648604598135882
fy_average:  0.264986522976254
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7351395401864118
force_error_y:  0.264986522976254
sobolev_term:  0.23137721524004928
REWARD:  -1.2315032784027151
--------
observation:  [-1.20647888  2.          0.         -1.          0.39818588 -1.2
 -1.2        -1.2         2.17999583  1.81068778  2.39818588]
timesteps:  221
action_pos:  0.02370114326477051
action_vel:  3.0
fx_average:  2.4006841176679314
fy_average:  -0.390872020183236
target_force_x:  2.0

fx_average:  -0.09563615032097299
fy_average:  0.7921751116477483
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -2.095636150320973
force_error_y:  0.7921751116477483
sobolev_term:  0.23339760937472392
REWARD:  -3.121208871343445
--------
observation:  [-1.33523836  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.83543801  2.47677758  3.        ]
timesteps:  235
action_pos:  -1.2
action_vel:  2.6139299869537354
fx_average:  0.8331021874906805
fy_average:  0.3202500579529134
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1668978125093195
force_error_y:  0.3202500579529134
sobolev_term:  0.1784314152191689
REWARD:  -1.6655792856814016
--------
observation:  [-1.20023236  2.          0.         -1.          0.61392999 -1.2
 -1.2        -1.2         2.47677758  3.          2.61392999]
timesteps:  236
action_pos:  -1.2
action_vel:  1.3422365188598633
fx_average:  1.6618996747153894
fy_average:  -0.12423175178999933
target_force_x:  2.

action_pos:  -1.2
action_vel:  2.088958263397217
fx_average:  1.384323497971525
fy_average:  0.7015983003522994
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6156765020284749
force_error_y:  0.7015983003522994
sobolev_term:  0.4736047232242039
REWARD:  -1.790879525604978
--------
observation:  [-1.09290889  2.          0.         -1.          0.08895826 -0.54012251
 -1.2        -1.2         2.0484308   3.          2.08895826]
timesteps:  249
action_pos:  -0.06274337768554687
action_vel:  3.0
fx_average:  1.351567208344985
fy_average:  0.20869170543562338
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.648432791655015
force_error_y:  0.20869170543562338
sobolev_term:  0.47404408696239947
REWARD:  -1.3311685840530378
--------
observation:  [-0.3060472   2.          0.         -0.05228615  1.         -1.2
 -1.2        -0.06274338  3.          2.08895826  3.        ]
timesteps:  250
-----------RESET!!!!!!!!!!!!!-----------
action_pos:  0.6763866662979126
action_

fx_average:  3.440231794651123
fy_average:  -1.2270457708700675
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.440231794651123
force_error_y:  -1.2270457708700675
sobolev_term:  0.6138577472494356
REWARD:  -3.2811353127706258
--------
observation:  [ 0.14770683  2.          0.          0.11172616  1.          1.2
 -0.18810883  0.1340714   3.          3.          3.        ]
timesteps:  14
action_pos:  -0.8656063556671142
action_vel:  3.0
fx_average:  2.0301034323425
fy_average:  -0.39647138228634154
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.030103432342500014
force_error_y:  -0.39647138228634154
sobolev_term:  0.5528663211204079
REWARD:  -0.9794411357492494
--------
observation:  [ 0.05376925  2.          0.         -0.72133863  1.         -0.18810883
  0.1340714  -0.86560636  3.          3.          3.        ]
timesteps:  15
action_pos:  -1.2
action_vel:  1.2278658747673035
fx_average:  1.9850064399037368
fy_average:  0.4675988476022492
target_force_x: 

fx_average:  3.1089668240090487
fy_average:  0.6680312975645143
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.1089668240090487
force_error_y:  0.6680312975645143
sobolev_term:  0.6118713895612503
REWARD:  -2.3888695111348133
--------
observation:  [-0.90937069  2.          0.         -1.          0.91925859  0.76653557
 -1.2        -1.2         3.          2.27707115  2.91925859]
timesteps:  29
action_pos:  -1.2
action_vel:  3.0
fx_average:  3.435257548549271
fy_average:  0.057147749426758876
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.435257548549271
force_error_y:  0.057147749426758876
sobolev_term:  0.6110683737735454
REWARD:  -2.1034736717495752
--------
observation:  [-1.09345699  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.27707115  2.91925859  3.        ]
timesteps:  30
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 250        |
|    ep_rew_mean       

fx_average:  0.8854981968821035
fy_average:  0.10183653799088162
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1145018031178964
force_error_y:  0.10183653799088162
sobolev_term:  0.1924908010127205
REWARD:  -1.4088291421214985
--------
observation:  [-1.20696102  2.          0.         -1.          1.         -1.01175478
 -1.2        -1.2         1.63173378  1.91936094  3.        ]
timesteps:  42
action_pos:  -1.2
action_vel:  2.8863434195518494
fx_average:  1.0731324523276817
fy_average:  0.047208872315585765
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9268675476723183
force_error_y:  0.047208872315585765
sobolev_term:  0.1752371989360588
REWARD:  -1.1493136189239628
--------
observation:  [-1.38715533  2.          0.         -1.          0.88634342 -1.2
 -1.2        -1.2         1.91936094  3.          2.88634342]
timesteps:  43
action_pos:  -1.2
action_vel:  2.9589773416519165
fx_average:  1.2659379780898352
fy_average:  -0.007858246464673388
target_f

fx_average:  1.0300529781000405
fy_average:  0.566418889540254
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9699470218999595
force_error_y:  0.566418889540254
sobolev_term:  0.2624236671943799
REWARD:  -1.7987895786345933
--------
observation:  [-1.4632114   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.01674742  3.          3.        ]
timesteps:  57
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.95384850943672
fy_average:  0.012802231552096957
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.04615149056328005
force_error_y:  0.012802231552096957
sobolev_term:  0.18239203154496225
REWARD:  -0.24134575366033925
--------
observation:  [-1.26817794  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  58
action_pos:  -1.2
action_vel:  2.3983603417873383
fx_average:  1.782116466700533
fy_average:  0.1302608575859842
target_force_x:  2.0
target_force_y: 

fx_average:  2.294950936195767
fy_average:  -0.4507001760333864
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2949509361957672
force_error_y:  -0.4507001760333864
sobolev_term:  0.4394204848138675
REWARD:  -1.185071597043021
--------
observation:  [-0.14089949  2.          0.         -0.19866776  0.84186429 -0.67382269
  0.59967871 -0.23840132  3.          1.53821987  2.84186429]
timesteps:  70
action_pos:  -1.2
action_vel:  2.936862289905548
fx_average:  2.160455916515416
fy_average:  -0.7271180246773526
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.1604559165154158
force_error_y:  -0.7271180246773526
sobolev_term:  0.435354109733057
REWARD:  -1.3229280509258254
--------
observation:  [-0.80389313  2.          0.         -1.          0.93686229  0.59967871
 -0.23840132 -1.2         1.53821987  2.84186429  2.93686229]
timesteps:  71
action_pos:  -1.2
action_vel:  1.4797603487968445
fx_average:  1.9825870472991327
fy_average:  -0.47817937960445844
target_forc

fx_average:  1.853157936368812
fy_average:  0.9184337759801134
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.146842063631188
force_error_y:  0.9184337759801134
sobolev_term:  0.24817727316657703
REWARD:  -1.3134531127778786
--------
observation:  [-0.75997485  2.          0.         -1.         -1.         -0.1391377
 -1.2        -1.2         1.06002867  1.51295549  1.        ]
timesteps:  85
action_pos:  -0.7123796224594116
action_vel:  3.0
fx_average:  1.8941675684353838
fy_average:  0.7864299078929307
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.10583243156461619
force_error_y:  0.7864299078929307
sobolev_term:  0.261646317888821
REWARD:  -1.153908657346368
--------
observation:  [-0.53447698  2.          0.         -0.59364969  1.         -1.2
 -1.2        -0.71237962  1.51295549  1.          3.        ]
timesteps:  86
action_pos:  -1.2
action_vel:  2.604055106639862
fx_average:  0.9688947610598168
fy_average:  1.1767833827455516
target_force_x:  2.0


fx_average:  2.3067143418708036
fy_average:  1.054770804433715
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3067143418708036
force_error_y:  1.054770804433715
sobolev_term:  0.3725583152559769
REWARD:  -1.7340434615604954
--------
observation:  [-1.33642762  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  98
action_pos:  -1.2
action_vel:  2.9769914150238037
fx_average:  2.127344318373936
fy_average:  1.0468447881991039
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.127344318373936
force_error_y:  1.0468447881991039
sobolev_term:  0.3365189714975465
REWARD:  -1.5107080780705864
--------
observation:  [-1.22920162  2.          0.         -1.          0.97699142 -1.2
 -1.2        -1.2         3.          3.          2.97699142]
timesteps:  99
action_pos:  -0.2905292987823486
action_vel:  3.0
fx_average:  2.574237221852535
fy_average:  0.627745607937584
target_force_x:  2.0
target_forc

fx_average:  0.9785468866512135
fy_average:  1.4576220287914659
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0214531133487865
force_error_y:  1.4576220287914659
sobolev_term:  0.4957265762875861
REWARD:  -2.9748017184278384
--------
observation:  [-0.59134826  2.          0.         -1.          1.         -0.40743785
 -1.2        -1.2         2.81702435  1.05717844  3.        ]
timesteps:  113
action_pos:  -1.1423051118850707
action_vel:  1.2085064053535461
fx_average:  0.5190891820699037
fy_average:  1.7744965346820565
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4809108179300963
force_error_y:  1.7744965346820565
sobolev_term:  0.5229566370637783
REWARD:  -3.778363989675931
--------
observation:  [-1.0469659   2.          0.         -0.95192093 -0.79149359 -1.2
 -1.2        -1.14230511  1.05717844  3.          1.20850641]
timesteps:  114
action_pos:  -0.7718228101730347
action_vel:  1.9143646359443665
fx_average:  0.9873018706010032
fy_average:  1.710

action_pos:  -1.2
action_vel:  3.0
fx_average:  1.7036858352785331
fy_average:  0.15507347496127682
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2963141647214669
force_error_y:  0.15507347496127682
sobolev_term:  0.3292377356320806
REWARD:  -0.7806253753148242
--------
observation:  [-1.09305396  2.          0.         -1.          1.         -0.49219923
 -1.2        -1.2         3.          2.25009689  3.        ]
timesteps:  127
action_pos:  -1.2
action_vel:  1.9901450276374817
fx_average:  1.9249528883228406
fy_average:  0.22281558298019385
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.07504711167715938
force_error_y:  0.22281558298019385
sobolev_term:  0.3319529598776336
REWARD:  -0.6298156545349869
--------
observation:  [-1.23927818  2.          0.         -1.         -0.00985497 -1.2
 -1.2        -1.2         2.25009689  3.          1.99014503]
timesteps:  128
action_pos:  -1.2
action_vel:  2.0390971899032593
fx_average:  2.1652131383298614
fy_aver

action_pos:  -1.1016746520996092
action_vel:  2.736340820789337
fx_average:  1.477837463582984
fy_average:  0.9707127601001972
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5221625364170159
force_error_y:  0.9707127601001972
sobolev_term:  0.38627481202163577
REWARD:  -1.879150108538849
--------
observation:  [-1.37106152  2.          0.         -0.91806221  0.73634082 -1.2
 -1.2        -1.10167465  1.          3.          2.73634082]
timesteps:  142
action_pos:  -1.1301956176757812
action_vel:  3.0
fx_average:  1.4915317866322224
fy_average:  0.9235505454393214
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5084682133677776
force_error_y:  0.9235505454393214
sobolev_term:  0.40370685252036076
REWARD:  -1.8357256113274598
--------
observation:  [-1.1574499   2.          0.         -0.94182968  1.         -1.2
 -1.10167465 -1.13019562  3.          2.73634082  3.        ]
timesteps:  143
action_pos:  -1.0451023578643799
action_vel:  2.8322561979293823
fx_aver

fx_average:  3.4155869983851366
fy_average:  0.5059182895622526
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4155869983851366
force_error_y:  0.5059182895622526
sobolev_term:  0.428350091957024
REWARD:  -2.3498553799044135
--------
observation:  [-0.18320207  2.          0.          0.10262823 -0.59171379 -1.2
 -1.2         0.12315388  2.94013876  1.74039066  1.40828621]
timesteps:  157
action_pos:  -1.2
action_vel:  3.0
fx_average:  3.3161321338985745
fy_average:  0.7071042709805083
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.3161321338985745
force_error_y:  0.7071042709805083
sobolev_term:  0.5076924285312624
REWARD:  -2.5309288334103455
--------
observation:  [-0.40300897  2.          0.         -1.          1.         -1.2
  0.12315388 -1.2         1.74039066  1.40828621  3.        ]
timesteps:  158
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
|    ep_rew_mean         

fx_average:  1.682620119034491
fy_average:  1.862222940330694
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.31737988096550906
force_error_y:  1.862222940330694
sobolev_term:  0.5034100671478932
REWARD:  -2.6830128884440962
--------
observation:  [-1.39004139  2.          0.         -1.         -0.84053844 -1.2
 -1.2        -1.2         2.41401264  2.68853849  1.15946156]
timesteps:  170
action_pos:  0.3351154804229736
action_vel:  3.0
fx_average:  2.441118464789137
fy_average:  0.20547261428402255
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.44111846478913685
force_error_y:  0.20547261428402255
sobolev_term:  0.3889214289205228
REWARD:  -1.0355125079936822
--------
observation:  [-0.5701096   2.          0.          0.2792629   1.         -1.2
 -1.2         0.33511548  2.68853849  1.15946156  3.        ]
timesteps:  171
action_pos:  -1.2
action_vel:  1.3634228706359863
fx_average:  2.1023388802402927
fy_average:  0.2944218345469718
target_force_x:  2.0
tar

fx_average:  1.3562417423045297
fy_average:  -0.12135125350133343
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6437582576954703
force_error_y:  -0.12135125350133343
sobolev_term:  0.2391445160378945
REWARD:  -1.0042540272346983
--------
observation:  [-1.73515681  2.          0.         -1.         -1.         -1.2
 -0.81645141 -1.2         3.          3.          1.        ]
timesteps:  185
action_pos:  -1.2
action_vel:  1.735109269618988
fx_average:  1.5235499221401194
fy_average:  0.05146192068257182
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4764500778598806
force_error_y:  0.05146192068257182
sobolev_term:  0.2998931546991588
REWARD:  -0.8278051532416112
--------
observation:  [-1.78791196  2.          0.         -1.         -0.26489073 -0.81645141
 -1.2        -1.2         3.          1.          1.73510927]
timesteps:  186
action_pos:  -1.2
action_vel:  2.1765230894088745
fx_average:  2.017320162212579
fy_average:  0.1612620331358832
target_forc

fx_average:  1.1931778598779665
fy_average:  0.2180478086397004
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8068221401220335
force_error_y:  0.2180478086397004
sobolev_term:  0.19973996292015236
REWARD:  -1.2246099116818863
--------
observation:  [-1.2784795   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.98517549  3.        ]
timesteps:  198
action_pos:  -1.0442582130432128
action_vel:  2.0608660876750946
fx_average:  1.4868908367808051
fy_average:  0.2750279671546915
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5131091632191949
force_error_y:  0.2750279671546915
sobolev_term:  0.25263731171747994
REWARD:  -1.0407744420913665
--------
observation:  [-1.41677099  2.          0.         -0.87021518  0.06086609 -1.2
 -1.2        -1.04425821  2.98517549  3.          2.06086609]
timesteps:  199
action_pos:  -1.2
action_vel:  1.6204963326454163
fx_average:  1.4921664764932174
fy_average:  0.21953939383536522
targ

fx_average:  0.9054463007094093
fy_average:  1.4531229575095699
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0945536992905907
force_error_y:  1.4531229575095699
sobolev_term:  0.46447556595493106
REWARD:  -3.0121522227550916
--------
observation:  [-1.26078198  2.          0.         -1.          0.09269622 -1.2
 -1.05491545 -1.2         3.          2.10892552  2.09269622]
timesteps:  213
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6556836521250222
fy_average:  0.8748999866750968
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3443163478749778
force_error_y:  0.8748999866750968
sobolev_term:  0.4428368606699371
REWARD:  -1.6620531952200117
--------
observation:  [-1.25381584  2.          0.         -1.          1.         -1.05491545
 -1.2        -1.2         2.10892552  2.09269622  3.        ]
timesteps:  214
action_pos:  -0.9947949886322021
action_vel:  1.8206342458724976
fx_average:  1.6800269050845171
fy_average:  0.169498203899111
target_force_x:

fx_average:  1.276451269561112
fy_average:  0.22045206892611693
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7235487304388879
force_error_y:  0.22045206892611693
sobolev_term:  0.17103723495671347
REWARD:  -1.1150380343217183
--------
observation:  [-1.13120871  2.          0.         -0.91006285  0.47361624 -1.2
 -1.2        -1.09207542  2.22664839  2.03280646  2.47361624]
timesteps:  226
action_pos:  -0.08373899459838867
action_vel:  1.9947476387023926
fx_average:  2.4347228488567936
fy_average:  -0.4099809527964871
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4347228488567936
force_error_y:  -0.4099809527964871
sobolev_term:  0.1998365962490972
REWARD:  -1.0445403979023777
--------
observation:  [-0.27750424  2.          0.         -0.0697825  -0.00525236 -1.2
 -1.09207542 -0.08373899  2.03280646  2.47361624  1.99474764]
timesteps:  227
action_pos:  -1.2
action_vel:  2.3402398228645325
fx_average:  1.833789462150181
fy_average:  -0.14215961385757073
ta

action_pos:  -1.2
action_vel:  1.2006078362464905
fx_average:  1.1125288531180197
fy_average:  0.6928110988801027
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8874711468819803
force_error_y:  0.6928110988801027
sobolev_term:  0.3337941122093777
REWARD:  -1.9140763579714608
--------
observation:  [-1.18306638  2.          0.         -1.         -0.79939216 -1.2
 -1.2        -1.2         1.81213099  2.09774119  1.20060784]
timesteps:  241
action_pos:  -0.3655697822570801
action_vel:  2.2224408984184265
fx_average:  1.6150895028564813
fy_average:  0.317122756486372
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.38491049714351866
force_error_y:  0.317122756486372
sobolev_term:  0.3602142142143857
REWARD:  -1.0622474678442764
--------
observation:  [-0.44409535  2.          0.         -0.30464149  0.2224409  -1.2
 -1.2        -0.36556978  2.09774119  1.20060784  2.2224409 ]
timesteps:  242
action_pos:  -1.2
action_vel:  2.1984755992889404
fx_average:  1.5261710

action_pos:  -1.2
action_vel:  3.0
fx_average:  -0.2109603744819531
fy_average:  1.8773328376847334
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -2.210960374481953
force_error_y:  1.8773328376847334
sobolev_term:  0.46052299803620794
REWARD:  -4.548816210202894
--------
observation:  [-1.43729087  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.28894001  3.          3.        ]
timesteps:  5
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.26062966313778313
fy_average:  1.4392548336593878
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.7393703368622169
force_error_y:  1.4392548336593878
sobolev_term:  0.4357004832812662
REWARD:  -3.614325653802871
--------
observation:  [-1.31658085  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  6
action_pos:  -1.2
action_vel:  2.1948676705360413
fx_average:  0.7856210366919845
fy_average:  1.1408807002844632
target

fx_average:  1.320725338208496
fy_average:  0.10645436679656776
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.679274661791504
force_error_y:  0.10645436679656776
sobolev_term:  0.18107788818027057
REWARD:  -0.9668069167683423
--------
observation:  [-1.27275466  2.          0.         -1.          0.10505557 -1.2
 -1.2        -1.2         2.48467967  2.94180828  2.10505557]
timesteps:  20
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.1810346791064705
fy_average:  0.07467072253821373
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8189653208935295
force_error_y:  0.07467072253821373
sobolev_term:  0.13537192574948972
REWARD:  -1.0290079691812328
--------
observation:  [-1.3089791   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.94180828  2.10505557  3.        ]
timesteps:  21
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9402505281528355
fy_average:  0.0142832982027903
target_force_x:  2.0
target_force_y:  0.0
force_er

fx_average:  1.5984842519088942
fy_average:  0.05475575577445664
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4015157480911058
force_error_y:  0.05475575577445664
sobolev_term:  0.22248013999626085
REWARD:  -0.6787516438618233
--------
observation:  [-1.50095851  2.          0.         -1.          0.53576738 -1.2
 -1.2        -1.2         3.          1.40467447  2.53576738]
timesteps:  35
action_pos:  -0.16637792587280273
action_vel:  3.0
fx_average:  2.8640009763960848
fy_average:  -0.024165149734948013
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8640009763960848
force_error_y:  -0.024165149734948013
sobolev_term:  0.28521684200755154
REWARD:  -1.1733829681385843
--------
observation:  [-0.35186216  2.          0.         -0.13864827  1.         -1.2
 -1.2        -0.16637793  1.40467447  2.53576738  3.        ]
timesteps:  36
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
| 

fx_average:  2.739570807914985
fy_average:  -0.03754705721996203
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7395708079149852
force_error_y:  -0.03754705721996203
sobolev_term:  0.3753812288805664
REWARD:  -1.1524990940155135
--------
observation:  [-0.07938333  2.          0.         -1.          1.         -1.2
  0.49940414 -1.2         1.9627924   2.87827098  3.        ]
timesteps:  48
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.4317600530588934
fy_average:  1.1376138592901583
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4317600530588934
force_error_y:  1.1376138592901583
sobolev_term:  0.5897103222672538
REWARD:  -2.1590842346163055
--------
observation:  [-1.0646428   2.          0.         -1.          1.          0.49940414
 -1.2        -1.2         2.87827098  3.          3.        ]
timesteps:  49
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.111220451247168
fy_average:  0.9919263470263338
target_force_x:  2.0
target_force_y:  0.0
force

fx_average:  3.268144262372358
fy_average:  0.5503186584228528
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.268144262372358
force_error_y:  0.5503186584228528
sobolev_term:  0.5914973096958274
REWARD:  -2.409960230491038
--------
observation:  [-1.01783252  2.          0.         -1.          0.59048206 -1.2
 -0.82113762 -1.2         3.          2.32218343  2.59048206]
timesteps:  63
action_pos:  -0.8716930389404297
action_vel:  1.982768952846527
fx_average:  2.7792325952224486
fy_average:  0.6162181754497499
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7792325952224486
force_error_y:  0.6162181754497499
sobolev_term:  0.5689444971121683
REWARD:  -1.9643952677843668
--------
observation:  [-0.87097047  2.          0.         -0.72641087 -0.01723105 -0.82113762
 -1.2        -0.87169304  2.32218343  2.59048206  1.98276895]
timesteps:  64
action_pos:  -1.2
action_vel:  2.1604204773902893
fx_average:  1.6834733962869204
fy_average:  1.8598415102385344
target_f

fx_average:  0.8116573227843099
fy_average:  0.16135045793690575
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1883426772156902
force_error_y:  0.16135045793690575
sobolev_term:  0.29115441289979194
REWARD:  -1.6408475480523879
--------
observation:  [-1.36602014  2.          0.         -1.          0.99188393 -1.2
 -1.2        -1.2         1.          1.          2.99188393]
timesteps:  76
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.4484136214772833
fy_average:  0.23703934168682206
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5515863785227166
force_error_y:  0.23703934168682206
sobolev_term:  0.28777749209954406
REWARD:  -2.0764032123090828
--------
observation:  [-1.41293933  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.          2.99188393  3.        ]
timesteps:  77
action_pos:  -1.2
action_vel:  2.32800155878067
fx_average:  0.046037140877575336
fy_average:  0.3140958988715268
target_force_x:  2.0
target_force

fx_average:  2.0226415601417655
fy_average:  0.4172750524220682
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.02264156014176555
force_error_y:  0.4172750524220682
sobolev_term:  0.33087934267018404
REWARD:  -0.7707959552340178
--------
observation:  [-0.66175196  2.          0.         -0.22460496  0.96394861 -1.2
 -1.2        -0.26952596  1.53297806  3.          2.96394861]
timesteps:  91
action_pos:  -1.2
action_vel:  2.159817695617676
fx_average:  1.832479991769003
fy_average:  0.5073561791613689
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.1675200082309971
force_error_y:  0.5073561791613689
sobolev_term:  0.3450157382320555
REWARD:  -1.0198919256244214
--------
observation:  [-0.6176545   2.          0.         -1.          0.1598177  -1.2
 -0.26952596 -1.2         3.          2.96394861  2.1598177 ]
timesteps:  92
action_pos:  -1.2
action_vel:  2.547741711139679
fx_average:  1.4445959572235234
fy_average:  0.46679579945809024
target_force_x:  2.0
targ

fx_average:  2.30778750503984
fy_average:  0.5914072236357869
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.30778750503984
force_error_y:  0.5914072236357869
sobolev_term:  0.31924358570487116
REWARD:  -1.218438314380498
--------
observation:  [-0.98883671  2.          0.         -1.          0.3586061  -1.2
 -0.518887   -1.2         3.          3.          2.3586061 ]
timesteps:  104
action_pos:  -1.1113646507263184
action_vel:  2.3483219742774963
fx_average:  2.1117832487668213
fy_average:  0.7038494640419343
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.11178324876682133
force_error_y:  0.7038494640419343
sobolev_term:  0.33199568013758946
REWARD:  -1.147628392946345
--------
observation:  [-1.15931157  2.          0.         -0.92613721  0.34832197 -0.518887
 -1.2        -1.11136465  3.          2.3586061   2.34832197]
timesteps:  105
action_pos:  -1.2
action_vel:  1.7235112190246582
fx_average:  1.7629169643753555
fy_average:  0.6349442915457623
target_

fx_average:  1.6687268292047228
fy_average:  0.18964928273761628
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3312731707952772
force_error_y:  0.18964928273761628
sobolev_term:  0.336776355376209
REWARD:  -0.8576988089091024
--------
observation:  [-1.54917058  2.          0.         -1.         -0.90711677 -1.2
 -1.2        -1.2         3.          3.          1.09288323]
timesteps:  119
action_pos:  -1.2
action_vel:  2.144422709941864
fx_average:  1.1577360008744302
fy_average:  0.39436810729851035
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8422639991255698
force_error_y:  0.39436810729851035
sobolev_term:  0.29996198959227954
REWARD:  -1.5365940960163598
--------
observation:  [-1.62658236  2.          0.         -1.          0.14442271 -1.2
 -1.2        -1.2         3.          1.09288323  2.14442271]
timesteps:  120
action_pos:  -1.2
action_vel:  1.8236311674118042
fx_average:  1.8048885807909012
fy_average:  0.2346341060716993
target_force_x:  2.

action_pos:  -1.2
action_vel:  3.0
fx_average:  1.3696662854351227
fy_average:  0.070968299743657
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6303337145648773
force_error_y:  0.070968299743657
sobolev_term:  0.14820329074174896
REWARD:  -0.8495053050502833
--------
observation:  [-1.23442101  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.85193092  2.21364969  3.        ]
timesteps:  133
action_pos:  -1.2
action_vel:  2.531581997871399
fx_average:  1.298556241105096
fy_average:  0.09032463573706434
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7014437588949041
force_error_y:  0.09032463573706434
sobolev_term:  0.15113159184538813
REWARD:  -0.9428999864773566
--------
observation:  [-1.2244112   2.          0.         -1.          0.531582   -1.2
 -1.2        -1.2         2.21364969  3.          2.531582  ]
timesteps:  134
action_pos:  -1.2
action_vel:  2.0228097438812256
fx_average:  1.0813365761380158
fy_average:  0.0909

fx_average:  1.962532433076041
fy_average:  0.19908216681846785
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.03746756692395903
force_error_y:  0.19908216681846785
sobolev_term:  0.26396367510936136
REWARD:  -0.5005134088517882
--------
observation:  [-1.04497544  2.          0.         -0.83515501  1.         -1.2
 -1.2        -1.00218601  3.          3.          3.        ]
timesteps:  148
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.5030392818907148
fy_average:  0.28172233829893495
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4969607181092852
force_error_y:  0.28172233829893495
sobolev_term:  0.26554317771724595
REWARD:  -1.044226234125466
--------
observation:  [-1.1638337   2.          0.         -1.          1.         -1.2
 -1.00218601 -1.2         3.          3.          3.        ]
timesteps:  149
action_pos:  -1.2
action_vel:  2.328374743461609
fx_average:  1.3843416251044176
fy_average:  0.25797318933492286
target_force_x:  2.0
target_forc

fx_average:  2.0525912947470952
fy_average:  0.008349810992477771
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.05259129474709523
force_error_y:  0.008349810992477771
sobolev_term:  0.26751752023437636
REWARD:  -0.32845862597394937
--------
observation:  [-0.74488438  2.          0.         -0.3396939   1.         -1.2
 -1.2        -0.40763268  3.          2.11173171  3.        ]
timesteps:  163
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.9377895894563404
fy_average:  0.17491974857399348
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.06221041054365961
force_error_y:  0.17491974857399348
sobolev_term:  0.33136554984387956
REWARD:  -0.5684957089615327
--------
observation:  [-0.84453695  2.          0.         -1.          1.         -1.2
 -0.40763268 -1.2         2.11173171  3.          3.        ]
timesteps:  164
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
|    ep_rew_

fx_average:  0.9334781728182677
fy_average:  0.544420695685037
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0665218271817323
force_error_y:  0.544420695685037
sobolev_term:  0.30732508087195765
REWARD:  -1.918267603738727
--------
observation:  [-1.07248307  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.70253044  1.          3.        ]
timesteps:  176
action_pos:  -1.2
action_vel:  2.3182201981544495
fx_average:  0.930339769767725
fy_average:  0.9678350827622512
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0696602302322749
force_error_y:  0.9678350827622512
sobolev_term:  0.36168294238621246
REWARD:  -2.399178255380739
--------
observation:  [-1.28420087  2.          0.         -1.          0.3182202  -1.2
 -1.2        -1.2         1.          3.          2.3182202 ]
timesteps:  177
action_pos:  -1.2
action_vel:  2.2885767221450806
fx_average:  1.3397021417260786
fy_average:  0.7166524989346778
target_force_x:  2.0
targ

fx_average:  1.5274322338994086
fy_average:  0.018787351163617523
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.47256776610059137
force_error_y:  0.018787351163617523
sobolev_term:  0.14800267272086068
REWARD:  -0.6393577899850695
--------
observation:  [-1.22810798  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.95133257  3.        ]
timesteps:  191
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.2295673257074661
fy_average:  0.06126826562761
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.7704326742925338
force_error_y:  0.06126826562761
sobolev_term:  0.08500464765167302
REWARD:  -1.9167055875718169
--------
observation:  [-1.22274186  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.95133257  3.          3.        ]
timesteps:  192
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.4444536697491497
fy_average:  -0.0399692185394041
target_force_x:  2.0
target_force_y:  0.0
force_

fx_average:  1.3836053369629653
fy_average:  0.3315577338781512
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6163946630370347
force_error_y:  0.3315577338781512
sobolev_term:  0.2347395432357969
REWARD:  -1.1826919401509828
--------
observation:  [-1.02544036  2.          0.         -1.         -0.47575641 -1.2
 -1.2        -1.2         1.96315002  1.          1.52424359]
timesteps:  204
action_pos:  -1.2
action_vel:  2.3205472826957703
fx_average:  1.2138537992485559
fy_average:  0.4243960173374083
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7861462007514441
force_error_y:  0.4243960173374083
sobolev_term:  0.23812738966549235
REWARD:  -1.4486696077543448
--------
observation:  [-1.11721227  2.          0.         -1.          0.32054728 -1.2
 -1.2        -1.2         1.          1.52424359  2.32054728]
timesteps:  205
action_pos:  -1.1942358970642089
action_vel:  3.0
fx_average:  0.9320543695585677
fy_average:  0.34069203983187774
target_force_x:  2.0

fx_average:  2.298422544982926
fy_average:  -0.049386639908995944
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.29842254498292586
force_error_y:  -0.049386639908995944
sobolev_term:  0.2685070345145424
REWARD:  -0.6163162194064642
--------
observation:  [-0.7901386   2.          0.         -0.18154263  0.92541355 -1.2
 -1.2        -0.21785116  3.          3.          2.92541355]
timesteps:  219
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.2642254006857185
fy_average:  -0.22622621107359042
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.26422540068571854
force_error_y:  -0.22622621107359042
sobolev_term:  0.3024226493836543
REWARD:  -0.7928742611429633
--------
observation:  [-0.1061051   2.          0.         -1.          1.         -1.2
 -0.21785116 -1.2         3.          2.92541355  3.        ]
timesteps:  220
action_pos:  -1.2
action_vel:  2.1418336629867554
fx_average:  1.378857744426335
fy_average:  0.6979815039156307
target_force_x:  2.0
target_f

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.8546082186508488
fy_average:  0.010227941462051172
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1453917813491512
force_error_y:  0.010227941462051172
sobolev_term:  0.19198832311365718
REWARD:  -1.3476080459248596
--------
observation:  [-1.22249523  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.9545399   3.          3.        ]
timesteps:  232
action_pos:  -1.2
action_vel:  2.1121349334716797
fx_average:  0.7854002943753993
fy_average:  0.23135327286172283
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2145997056246007
force_error_y:  0.23135327286172283
sobolev_term:  0.19791435629498108
REWARD:  -1.6438673347813046
--------
observation:  [-1.51253986  2.          0.         -1.          0.11213493 -1.2
 -1.2        -1.2         3.          3.          2.11213493]
timesteps:  233
action_pos:  0.2997429370880127
action_vel:  3.0
fx_average:  2.5589853578948154
fy_average: 

fx_average:  1.0777963718013053
fy_average:  0.24326681547681148
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9222036281986947
force_error_y:  0.24326681547681148
sobolev_term:  0.18078703390326656
REWARD:  -1.3462574775787726
--------
observation:  [-1.54502822  2.          0.         -1.          0.03829861 -1.2
 -1.2        -1.2         3.          3.          2.03829861]
timesteps:  247
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6350065708877943
fy_average:  0.18255599464995073
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.36499342911220567
force_error_y:  0.18255599464995073
sobolev_term:  0.18714695092340863
REWARD:  -0.734696374685565
--------
observation:  [-1.18852076  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.03829861  3.        ]
timesteps:  248
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.3814711812522398
fy_average:  0.24423981839931458
target_force_x:  2.0
target_force_y:  0.0
for

action_pos:  -1.2
action_vel:  2.0376956462860107
fx_average:  1.3488980219692162
fy_average:  0.3584102812528708
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6511019780307838
force_error_y:  0.3584102812528708
sobolev_term:  0.3002795638012065
REWARD:  -1.309791823084861
--------
observation:  [-1.54523358  2.          0.         -1.          0.03769565 -1.2
 -1.2        -1.2         3.          3.          2.03769565]
timesteps:  11
action_pos:  -1.2
action_vel:  2.6186081171035767
fx_average:  0.8906055853303133
fy_average:  0.6124229163674068
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1093944146696866
force_error_y:  0.6124229163674068
sobolev_term:  0.26148412216295475
REWARD:  -1.9833014532000481
--------
observation:  [-1.45067337  2.          0.         -1.          0.61860812 -1.2
 -1.2        -1.2         3.          2.03769565  2.61860812]
timesteps:  12
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.8672021303902504
fy_average:  0.633289

fx_average:  0.2821263818185028
fy_average:  0.3823342916488728
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.717873618181497
force_error_y:  0.3823342916488728
sobolev_term:  0.25425717052720526
REWARD:  -2.354465080357575
--------
observation:  [-1.18581105  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.42582923  3.        ]
timesteps:  26
action_pos:  -1.2
action_vel:  3.0
fx_average:  -0.4062515559267759
fy_average:  0.3911029318655349
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -2.406251555926776
force_error_y:  0.3911029318655349
sobolev_term:  0.22096165416849586
REWARD:  -3.0183161419608067
--------
observation:  [-1.35362908  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.42582923  3.          3.        ]
timesteps:  27
action_pos:  -1.2
action_vel:  3.0
fx_average:  -0.17702520502715854
fy_average:  0.3321922752319684
target_force_x:  2.0
target_force_y:  0.0
force_erro

fx_average:  1.3575160236979449
fy_average:  0.23302780721511168
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6424839763020551
force_error_y:  0.23302780721511168
sobolev_term:  0.23689259457125872
REWARD:  -1.1124043780884256
--------
observation:  [-1.30294256  2.          0.         -1.          0.07752323 -1.2
 -0.92254672 -1.2         2.27445221  3.          2.07752323]
timesteps:  41
action_pos:  -0.6174286365509033
action_vel:  3.0
fx_average:  2.4759914061463397
fy_average:  0.13274209381491653
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4759914061463397
force_error_y:  0.13274209381491653
sobolev_term:  0.3297480881014548
REWARD:  -0.9384815880627111
--------
observation:  [-0.83263027  2.          0.         -0.51452386  1.         -0.92254672
 -1.2        -0.61742864  3.          2.07752323  3.        ]
timesteps:  42
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|

fx_average:  1.08411055314701
fy_average:  0.49657992983810234
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9158894468529899
force_error_y:  0.49657992983810234
sobolev_term:  0.30747643703252614
REWARD:  -1.7199458137236183
--------
observation:  [-1.47335676  2.          0.         -1.         -0.03800905 -1.2
 -1.2        -1.2         3.          3.          1.96199095]
timesteps:  54
action_pos:  -0.8519715785980224
action_vel:  1.9640848636627197
fx_average:  2.273648049377003
fy_average:  0.665549311082731
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.27364804937700304
force_error_y:  0.665549311082731
sobolev_term:  0.3591061069252996
REWARD:  -1.2983034673850335
--------
observation:  [-1.20723186  2.          0.         -0.70997632 -0.03591514 -1.2
 -1.2        -0.85197158  3.          1.96199095  1.96408486]
timesteps:  55
action_pos:  0.3082284450531006
action_vel:  1.9032772779464722
fx_average:  2.6071822467167634
fy_average:  0.15325461446693

fx_average:  1.2085406380768928
fy_average:  0.12786403980785002
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7914593619231072
force_error_y:  0.12786403980785002
sobolev_term:  0.11304847217827382
REWARD:  -1.032371873909231
--------
observation:  [-1.31185982  2.          0.         -1.          0.86993444 -1.2
 -1.2        -1.2         3.          2.87534726  2.86993444]
timesteps:  69
action_pos:  -1.2
action_vel:  1.6231176853179932
fx_average:  1.1988693966631965
fy_average:  0.1326634598135999
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8011306033368035
force_error_y:  0.1326634598135999
sobolev_term:  0.16648093522548799
REWARD:  -1.1002749983758913
--------
observation:  [-1.50897932  2.          0.         -1.         -0.37688231 -1.2
 -1.2        -1.2         2.87534726  2.86993444  1.62311769]
timesteps:  70
action_pos:  -1.2
action_vel:  1.0
fx_average:  1.8928578613257556
fy_average:  0.11114519520465933
target_force_x:  2.0
target_force_y

fx_average:  1.4239457864409253
fy_average:  0.9178909830474489
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5760542135590747
force_error_y:  0.9178909830474489
sobolev_term:  0.4043865485003594
REWARD:  -1.898331745106883
--------
observation:  [-1.22794274  2.          0.         -1.          0.79080391 -1.2
 -1.2        -1.2         3.          1.35810435  2.79080391]
timesteps:  82
action_pos:  -1.2
action_vel:  2.978598475456238
fx_average:  1.3940048734516417
fy_average:  0.8828608071394676
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6059951265483583
force_error_y:  0.8828608071394676
sobolev_term:  0.37753834707275397
REWARD:  -1.8663942807605798
--------
observation:  [-1.40643485  2.          0.         -1.          0.97859848 -1.2
 -1.2        -1.2         1.35810435  2.79080391  2.97859848]
timesteps:  83
action_pos:  -0.7993057250976562
action_vel:  2.8289341926574707
fx_average:  2.3344650432450327
fy_average:  0.21391666135595008
target_fo

fx_average:  1.7218270430700853
fy_average:  0.1705494281838627
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2781729569299147
force_error_y:  0.1705494281838627
sobolev_term:  0.21573871622302113
REWARD:  -0.6644611013367986
--------
observation:  [-1.4055219   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.53762972  3.          3.        ]
timesteps:  97
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.520775854617306
fy_average:  0.21006961982182842
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.479224145382694
force_error_y:  0.21006961982182842
sobolev_term:  0.2241461757717635
REWARD:  -0.9134399409762859
--------
observation:  [-1.29986757  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  98
action_pos:  -1.2
action_vel:  2.630357265472412
fx_average:  1.4082005943195868
fy_average:  0.24005994969541816
target_force_x:  2.0
target_force_y:  

fx_average:  0.7176719298721153
fy_average:  0.8615194671578573
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2823280701278847
force_error_y:  0.8615194671578573
sobolev_term:  0.30525679888054247
REWARD:  -2.4491043361662848
--------
observation:  [-1.13679693  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.72503483  2.68049842  3.        ]
timesteps:  110
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9400508477056565
fy_average:  0.6953576071070748
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0599491522943434
force_error_y:  0.6953576071070748
sobolev_term:  0.28305864114946466
REWARD:  -2.038365400550883
--------
observation:  [-1.2669992   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.68049842  3.          3.        ]
timesteps:  111
action_pos:  -1.2
action_vel:  2.8557867407798767
fx_average:  0.24505460914564234
fy_average:  0.8952792849283577
target_force_x:  2.0
target_force_y

action_pos:  -0.7672166347503662
action_vel:  2.923867106437683
fx_average:  2.1030305729194145
fy_average:  0.09081772441834798
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.10303057291941453
force_error_y:  0.09081772441834798
sobolev_term:  0.2658326747324506
REWARD:  -0.4596809720702131
--------
observation:  [-1.03775355  2.          0.         -0.6393472   0.92386711 -1.2
 -1.2        -0.76721663  3.          3.          2.92386711]
timesteps:  125
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4086541729914883
fy_average:  0.1326295033695622
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5913458270085117
force_error_y:  0.1326295033695622
sobolev_term:  0.20457037321069657
REWARD:  -0.9285457035887704
--------
observation:  [-1.116586    2.          0.         -1.          1.         -1.2
 -0.76721663 -1.2         3.          2.92386711  3.        ]
timesteps:  126
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.1705576419009789
fy_average:  0.04

action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0180080117846941
fy_average:  0.40407496121297326
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9819919882153059
force_error_y:  0.40407496121297326
sobolev_term:  0.2713126283427487
REWARD:  -1.6573795777710278
--------
observation:  [-1.02218279  2.          0.         -1.          1.         -0.46421342
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  139
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9800530400796436
fy_average:  0.4001163720849601
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0199469599203566
force_error_y:  0.4001163720849601
sobolev_term:  0.27011878874535417
REWARD:  -1.6901821207506709
--------
observation:  [-1.1131528  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  140
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0557799854356562
fy_average:  0.3874861204238094
target_force_x:  

fx_average:  1.7321986664895215
fy_average:  1.281312616301525
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2678013335104785
force_error_y:  1.281312616301525
sobolev_term:  0.5737780490694512
REWARD:  -2.1228919988814545
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  154
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9690261731997118
fy_average:  1.1788709387959622
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0309738268002882
force_error_y:  1.1788709387959622
sobolev_term:  0.47832578876301507
REWARD:  -2.6881705543592656
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  155
action_pos:  -0.7643769979476929
action_vel:  3.0
fx_average:  1.642204396872182
fy_average:  0.5862718779159874
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -

fx_average:  1.5881264840663574
fy_average:  0.1905577939250447
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.41187351593364263
force_error_y:  0.1905577939250447
sobolev_term:  0.16600002239945086
REWARD:  -0.7684313322581382
--------
observation:  [-1.2539551   2.          0.         -1.          1.         -0.96631322
 -1.2        -1.2         3.          2.25573361  3.        ]
timesteps:  169
action_pos:  -1.2
action_vel:  2.9585509300231934
fx_average:  0.905539014710408
fy_average:  0.25530774352347674
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.094460985289592
force_error_y:  0.25530774352347674
sobolev_term:  0.15281853720122696
REWARD:  -1.5025872660142956
--------
observation:  [-1.3765284   2.          0.         -1.          0.95855093 -1.2
 -1.2        -1.2         2.25573361  3.          2.95855093]
timesteps:  170
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |


fx_average:  2.5252887341285306
fy_average:  0.6633829634779054
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5252887341285306
force_error_y:  0.6633829634779054
sobolev_term:  0.5165207948219835
REWARD:  -1.7051924924284196
--------
observation:  [-0.47367035  2.          0.          0.02304351  0.55679458 -1.2
 -1.2         0.02765222  3.          3.          2.55679458]
timesteps:  182
action_pos:  -0.7510647296905517
action_vel:  2.206939220428467
fx_average:  2.2740328932473224
fy_average:  1.1733562577321528
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2740328932473224
force_error_y:  1.1733562577321528
sobolev_term:  0.5128667106516442
REWARD:  -1.9602558616311194
--------
observation:  [-0.56121517  2.          0.         -0.62588727  0.20693922 -1.2
  0.02765222 -0.75106473  3.          2.55679458  2.20693922]
timesteps:  183
action_pos:  -1.2
action_vel:  2.6260247230529785
fx_average:  2.020622544971331
fy_average:  1.0236634455978098
target_forc

fx_average:  1.2206741686827698
fy_average:  0.0045264249307491395
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7793258313172302
force_error_y:  0.0045264249307491395
sobolev_term:  0.09448733058227407
REWARD:  -0.8783395868302535
--------
observation:  [-1.23365383  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.89912164  3.          3.        ]
timesteps:  197
action_pos:  -1.0761148452758789
action_vel:  3.0
fx_average:  1.255688388186132
fy_average:  0.025847635628366183
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.744311611813868
force_error_y:  0.025847635628366183
sobolev_term:  0.09813196556229013
REWARD:  -0.8682912130045243
--------
observation:  [-1.10368221  2.          0.         -0.89676237  1.         -1.2
 -1.2        -1.07611485  3.          3.          3.        ]
timesteps:  198
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.8850152499572573
fy_average:  0.029483811096176846
target_force_x:  2.0
targ

fx_average:  1.0762372155692859
fy_average:  0.05865685302649515
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9237627844307141
force_error_y:  0.05865685302649515
sobolev_term:  0.09730739002080792
REWARD:  -1.0797270274780173
--------
observation:  [-1.45170775  2.          0.         -1.          0.15407836 -1.2
 -1.2        -1.2         3.          2.57125139  2.15407836]
timesteps:  210
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.2955121430099845
fy_average:  0.07251033194102517
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7044878569900155
force_error_y:  0.07251033194102517
sobolev_term:  0.10863246799864458
REWARD:  -0.8856306569296852
--------
observation:  [-1.20738231  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.57125139  2.15407836  3.        ]
timesteps:  211
action_pos:  -1.2
action_vel:  2.9717880487442017
fx_average:  1.2152206345031367
fy_average:  0.13605370989937085
target_force_x:  2.0
target_fo

fx_average:  0.9574986117238209
fy_average:  -0.07809694838838335
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.042501388276179
force_error_y:  -0.07809694838838335
sobolev_term:  0.08992819309872546
REWARD:  -1.210526529763288
--------
observation:  [-1.19559564  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.61330706  3.        ]
timesteps:  225
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.7619923555199983
fy_average:  -0.045506227128060385
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2380076444800017
force_error_y:  -0.045506227128060385
sobolev_term:  0.08873372600171005
REWARD:  -1.372247597609772
--------
observation:  [-1.28302031  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.61330706  3.          3.        ]
timesteps:  226
action_pos:  -1.2
action_vel:  2.4569507837295532
fx_average:  0.8083228842084514
fy_average:  -0.03758835863941132
target_force_x:  2.0
targe

fx_average:  1.550637004044301
fy_average:  0.2852726623330021
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.44936299595569906
force_error_y:  0.2852726623330021
sobolev_term:  0.1749316276900125
REWARD:  -0.9095672859787136
--------
observation:  [-1.22867022  2.          0.         -1.          1.         -1.2
 -1.03640671 -1.2         2.16065621  2.39958906  3.        ]
timesteps:  238
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.684657230006112
fy_average:  0.21334330312213454
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.315342769993888
force_error_y:  0.21334330312213454
sobolev_term:  0.17404495367334905
REWARD:  -0.7027310267893716
--------
observation:  [-1.35939766  2.          0.         -1.          1.         -1.03640671
 -1.2        -1.2         2.39958906  3.          3.        ]
timesteps:  239
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6112161345587637
fy_average:  0.219533054198774
target_force_x:  2.0
target_force_y:  0.0
for

fx_average:  0.5325419898811106
fy_average:  0.21136842754742058
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4674580101188894
force_error_y:  0.21136842754742058
sobolev_term:  0.18682704445233564
REWARD:  -1.8656534821186457
--------
observation:  [-1.39585982  2.          0.         -1.          1.         -0.58708363
 -1.2        -1.2         1.47347879  2.80882657  3.        ]
timesteps:  3
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.894123551117971
fy_average:  0.32423036690096374
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.105876448882029
force_error_y:  0.32423036690096374
sobolev_term:  0.22178511388191796
REWARD:  -1.6518919296649106
--------
observation:  [-1.15322083  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.80882657  3.          3.        ]
timesteps:  4
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.8619891332031443
fy_average:  0.3725002698761601
target_force_x:  2.0
target_force_y:  0.0
for

action_pos:  -1.2
action_vel:  1.955484390258789
fx_average:  1.1709647660959608
fy_average:  0.11345347564384331
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8290352339040392
force_error_y:  0.11345347564384331
sobolev_term:  0.14701569374380605
REWARD:  -1.0895044032916885
--------
observation:  [-1.51632141  2.          0.         -1.         -0.04451561 -1.2
 -1.2        -1.2         3.          2.67892683  1.95548439]
timesteps:  17
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.546730901075834
fy_average:  0.11693561669880867
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4532690989241659
force_error_y:  0.11693561669880867
sobolev_term:  0.15424056137405356
REWARD:  -0.7244452769970282
--------
observation:  [-1.00525448  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.67892683  1.95548439  3.        ]
timesteps:  18
action_pos:  -0.8418469190597534
action_vel:  1.6824443340301514
fx_average:  1.6718206646433402
fy

fx_average:  1.6262367809660065
fy_average:  0.8225554304558329
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.37376321903399345
force_error_y:  0.8225554304558329
sobolev_term:  0.38151640118256736
REWARD:  -1.5778350506723937
--------
observation:  [-0.98619611  2.          0.         -0.49847776  1.         -1.2
 -1.09818106 -0.59817331  2.3173486   3.          3.        ]
timesteps:  32
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.4376217946160284
fy_average:  1.1751586801865286
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5623782053839717
force_error_y:  1.1751586801865286
sobolev_term:  0.34765928280609343
REWARD:  -3.0851961683765934
--------
observation:  [-0.76650631  2.          0.         -1.          1.         -1.09818106
 -0.59817331 -1.2         3.          3.          3.        ]
timesteps:  33
action_pos:  -0.7894778251647949
action_vel:  2.3282967805862427
fx_average:  0.7506878026874672
fy_average:  0.9787268785271052
target_force_x

fx_average:  1.27526566385985
fy_average:  0.6505113779821632
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7247343361401499
force_error_y:  0.6505113779821632
sobolev_term:  0.39459282092260706
REWARD:  -1.7698385350449202
--------
observation:  [-1.50307429  2.          0.         -1.         -0.57896864 -1.2
 -1.2        -1.2         3.          3.          1.42103136]
timesteps:  47
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.0709464876505193
fy_average:  0.6568332560883977
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.07094648765051925
force_error_y:  0.6568332560883977
sobolev_term:  0.41211245632839827
REWARD:  -1.1398922000673153
--------
observation:  [-1.53510719  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          1.42103136  3.        ]
timesteps:  48
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|    ep_rew_mean          |

fx_average:  2.4300880365385598
fy_average:  0.14980732265690005
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.43008803653855976
force_error_y:  0.14980732265690005
sobolev_term:  0.26967908576085764
REWARD:  -0.8495744449563175
--------
observation:  [-0.99182689  2.          0.         -0.61781353  0.90693867 -1.05127294
 -1.2        -0.74137623  3.          3.          2.90693867]
timesteps:  60
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6958678573449912
fy_average:  0.2274704442666069
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.30413214265500876
force_error_y:  0.2274704442666069
sobolev_term:  0.22687338787329156
REWARD:  -0.7584759747949072
--------
observation:  [-0.97844755  2.          0.         -1.          1.         -1.2
 -0.74137623 -1.2         3.          2.90693867  3.        ]
timesteps:  61
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.423075816351503
fy_average:  0.07104880187457271
target_force_x:  2.0
target_force_y:  0.0


action_pos:  -1.2
action_vel:  2.5687126517295837
fx_average:  0.9509402693515306
fy_average:  -0.03271025211464518
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0490597306484695
force_error_y:  -0.03271025211464518
sobolev_term:  0.2884128770079029
REWARD:  -1.3701828597710175
--------
observation:  [-1.01496638  2.          0.         -1.          0.56871265 -0.59243202
 -1.2        -1.2         2.19301772  3.          2.56871265]
timesteps:  75
action_pos:  -0.43484773635864254
action_vel:  3.0
fx_average:  1.5159053582397612
fy_average:  -0.11793070820543783
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.48409464176023875
force_error_y:  -0.11793070820543783
sobolev_term:  0.3576239637706998
REWARD:  -0.9596493137363764
--------
observation:  [-0.61091971  2.          0.         -0.36237311  1.         -1.2
 -1.2        -0.43484774  3.          2.56871265  3.        ]
timesteps:  76
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0645101163795458
fy_a

fx_average:  1.3899695132684302
fy_average:  1.9795027060848955
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6100304867315698
force_error_y:  1.9795027060848955
sobolev_term:  0.4210848547083035
REWARD:  -3.010618047524769
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  88
action_pos:  -1.1059586763381957
action_vel:  3.0
fx_average:  0.8484898047703041
fy_average:  0.3371832197855297
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.151510195229696
force_error_y:  0.3371832197855297
sobolev_term:  0.1651901651510961
REWARD:  -1.6538835801663219
--------
observation:  [-1.14630337  2.          0.         -0.92163223  1.         -1.2
 -1.2        -1.10595868  3.          3.          3.        ]
timesteps:  89
action_pos:  -1.0454771518707275
action_vel:  3.0
fx_average:  1.191933273979764
fy_average:  0.003735785968746314
target_force_x:  2.0
target_force_y

fx_average:  2.0005428549390936
fy_average:  -0.05203284173481312
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.0005428549390935622
force_error_y:  -0.05203284173481312
sobolev_term:  0.45973632059249603
REWARD:  -0.5123120172664027
--------
observation:  [ 0.3568368   2.          0.         -1.          1.         -1.2
 -0.21237302 -1.2         3.          3.          3.        ]
timesteps:  103
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.527121402233577
fy_average:  1.1508846993431057
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5271214022335768
force_error_y:  1.1508846993431057
sobolev_term:  0.5629700004667394
REWARD:  -2.240976102043422
--------
observation:  [-0.95209899  2.          0.         -1.          1.         -0.21237302
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  104
action_pos:  -0.6233567476272582
action_vel:  3.0
fx_average:  2.3971974139608143
fy_average:  0.9993299772058286
target_force_x:  2.0
targe

fx_average:  2.3995503903101234
fy_average:  0.9717564480950602
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3995503903101234
force_error_y:  0.9717564480950602
sobolev_term:  0.607656826276112
REWARD:  -1.9789636646812956
--------
observation:  [-0.46501313  2.          0.         -0.30817032  1.          0.45047979
 -1.2        -0.36980438  1.96382761  1.          3.        ]
timesteps:  116
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.4109161061151516
fy_average:  0.7102978985503454
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4109161061151516
force_error_y:  0.7102978985503454
sobolev_term:  0.5873682189888008
REWARD:  -1.7085822236542978
--------
observation:  [-0.96744161  2.          0.         -1.          1.         -1.2
 -0.36980438 -1.2         1.          3.          3.        ]
timesteps:  117
action_pos:  -0.19814229011535645
action_vel:  1.9878177642822266
fx_average:  3.78701848575818
fy_average:  -0.012098521776870541
target_force_x: 

fx_average:  0.43575548670204456
fy_average:  1.0031937418759997
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5642445132979554
force_error_y:  1.0031937418759997
sobolev_term:  0.24317764129525965
REWARD:  -2.8106158964692147
--------
observation:  [-1.41729832  2.          0.         -1.          0.52506924 -1.2
 -1.2        -1.2         3.          2.69582045  2.52506924]
timesteps:  131
action_pos:  -0.13918719291687012
action_vel:  3.0
fx_average:  2.3198149847816274
fy_average:  -0.0690248233909504
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3198149847816274
force_error_y:  -0.0690248233909504
sobolev_term:  0.259138108075714
REWARD:  -0.6479779162482918
--------
observation:  [-0.7546979   2.          0.         -0.11598933  1.         -1.2
 -1.2        -0.13918719  2.69582045  2.52506924  3.        ]
timesteps:  132
action_pos:  -0.5058885812759399
action_vel:  3.0
fx_average:  2.5532886618219055
fy_average:  -0.2031901624394271
target_force_x:  2

action_pos:  -1.2
action_vel:  1.2741717100143433
fx_average:  1.7586793754328323
fy_average:  0.464688781848724
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.24132062456716774
force_error_y:  0.464688781848724
sobolev_term:  0.3992438313418661
REWARD:  -1.1052532377577577
--------
observation:  [-1.12845942  2.          0.         -1.         -0.72582829  0.61690092
 -1.2        -1.2         2.7225287   3.          1.27417171]
timesteps:  145
action_pos:  -1.2
action_vel:  1.3422775268554688
fx_average:  2.243122382304372
fy_average:  0.452488327538344
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.24312238230437222
force_error_y:  0.452488327538344
sobolev_term:  0.42893558991169045
REWARD:  -1.1245462997544067
--------
observation:  [-1.58325117  2.          0.         -1.         -0.65772247 -1.2
 -1.2        -1.2         3.          1.27417171  1.34227753]
timesteps:  146
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.7968729830912156
fy_average:  0.

fx_average:  2.0832590615528566
fy_average:  2.5336947104769236
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.08325906155285656
force_error_y:  2.5336947104769236
sobolev_term:  0.6803997743637575
REWARD:  -3.2973535463935377
--------
observation:  [-0.88195132  2.          0.         -1.          0.46490502 -0.08013697
 -1.14921355 -1.2         3.          2.76348627  2.46490502]
timesteps:  160
action_pos:  -1.2
action_vel:  2.9149723052978516
fx_average:  1.7412623861131071
fy_average:  2.2131806679857506
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.25873761388689287
force_error_y:  2.2131806679857506
sobolev_term:  0.6202671103537901
REWARD:  -3.0921853922264337
--------
observation:  [-1.0346996   2.          0.         -1.          0.91497231 -1.14921355
 -1.2        -1.2         2.76348627  2.46490502  2.91497231]
timesteps:  161
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.741022176362848
fy_average:  1.6710250451081103
target_force_x:  2.0
ta

fx_average:  3.3772088850204534
fy_average:  -0.8748218902447604
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.3772088850204534
force_error_y:  -0.8748218902447604
sobolev_term:  0.5782250190927914
REWARD:  -2.8302557943580053
--------
observation:  [0.36803833 2.         0.         0.25592625 0.95144153 0.96954632
 0.50362215 0.3071115  3.         3.         2.95144153]
timesteps:  175
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.5465978875376039
fy_average:  -0.35198644867563683
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.45340211246239615
force_error_y:  -0.35198644867563683
sobolev_term:  0.5349656873026694
REWARD:  -1.3403542484407023
--------
observation:  [ 0.35181272  2.          0.         -1.          1.          0.50362215
  0.3071115  -1.2         3.          2.95144153  3.        ]
timesteps:  176
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|    ep_rew_mean

fx_average:  3.403888954131276
fy_average:  0.21675656721411093
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.403888954131276
force_error_y:  0.21675656721411093
sobolev_term:  0.7822784294072284
REWARD:  -2.4029239507526152
--------
observation:  [-0.43828796  2.          0.         -0.95342088  1.         -1.2
  0.13262115 -1.14410505  3.          1.68860412  3.        ]
timesteps:  188
action_pos:  -0.9363391399383545
action_vel:  2.4698228240013123
fx_average:  2.515352572692006
fy_average:  1.2451935496644058
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5153525726920059
force_error_y:  1.2451935496644058
sobolev_term:  0.7436184498797045
REWARD:  -2.504164572236116
--------
observation:  [-0.91580331  2.          0.         -0.78028262  0.46982282  0.13262115
 -1.14410505 -0.93633914  1.68860412  3.          2.46982282]
timesteps:  189
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.8848073673165653
fy_average:  1.4556763718140355
target_force_x:  2.

action_pos:  -1.2
action_vel:  3.0
fx_average:  1.643269500830032
fy_average:  -0.27263646980946277
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.356730499169968
force_error_y:  -0.27263646980946277
sobolev_term:  0.15504877708525514
REWARD:  -0.7844157460646859
--------
observation:  [-0.69232162  2.          0.         -1.          1.         -0.62521863
 -0.78264806 -1.2         3.          3.          3.        ]
timesteps:  203
action_pos:  -0.4586129665374756
action_vel:  2.357996702194214
fx_average:  1.7160340472564268
fy_average:  -0.15811058217025878
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.28396595274357317
force_error_y:  -0.15811058217025878
sobolev_term:  0.2456822456139327
REWARD:  -0.6877587805277646
--------
observation:  [-0.46632689  2.          0.         -0.38217747  0.3579967  -0.78264806
 -1.2        -0.45861297  3.          3.          2.3579967 ]
timesteps:  204
action_pos:  -0.8919796228408813
action_vel:  1.7647984027862549


fx_average:  2.0548702029421064
fy_average:  -0.008961440764416065
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.05487020294210643
force_error_y:  -0.008961440764416065
sobolev_term:  0.3453946378000581
REWARD:  -0.4092262815065806
--------
observation:  [-1.09671955  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.87023067  3.        ]
timesteps:  216
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.8057762219628208
fy_average:  0.05483679808742514
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.19422377803717916
force_error_y:  0.05483679808742514
sobolev_term:  0.3312196513871807
REWARD:  -0.580280227511785
--------
observation:  [-1.22088751  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.87023067  3.          3.        ]
timesteps:  217
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.3692480491772128
fy_average:  0.28424474840433195
target_force_x:  2.0
target_force_y:  0.0
f

fx_average:  1.735145796434144
fy_average:  -0.004588021830010511
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2648542035658561
force_error_y:  -0.004588021830010511
sobolev_term:  0.21394979795201705
REWARD:  -0.48339202334788367
--------
observation:  [-1.11350935  2.          0.         -1.          1.         -1.03889329
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  231
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4658770023615182
fy_average:  0.04094834238497998
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5341229976384818
force_error_y:  0.04094834238497998
sobolev_term:  0.18775907081416293
REWARD:  -0.7628304108376247
--------
observation:  [-1.16850943  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  232
action_pos:  -1.2
action_vel:  1.1562888622283936
fx_average:  1.109680519006768
fy_average:  0.13607773299566092
target_force_x:  2.0

fx_average:  2.8505592739342016
fy_average:  0.4028035527215008
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8505592739342016
force_error_y:  0.4028035527215008
sobolev_term:  0.4214463520935847
REWARD:  -1.6748091787492871
--------
observation:  [-0.51434592  2.          0.          0.09758842  1.         -1.2
 -1.2         0.1171061   3.          3.          3.        ]
timesteps:  244
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.268699237658272
fy_average:  0.5364381328930433
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.26869923765827197
force_error_y:  0.5364381328930433
sobolev_term:  0.43281302632186375
REWARD:  -1.237950396873179
--------
observation:  [ 0.44048562  2.          0.         -1.          1.         -1.2
  0.1171061  -1.2         3.          3.          3.        ]
timesteps:  245
action_pos:  -0.8940905570983887
action_vel:  3.0
fx_average:  2.4686544186466346
fy_average:  1.2158520872228236
target_force_x:  2.0
target_force_y:  0

fx_average:  0.836895659936682
fy_average:  0.14983394162805466
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.163104340063318
force_error_y:  0.14983394162805466
sobolev_term:  0.13675969949176875
REWARD:  -1.4496979811831416
--------
observation:  [-1.27251283  2.          0.         -1.          0.5522989  -1.2
 -0.86804602 -1.2         3.          3.          2.5522989 ]
timesteps:  9
action_pos:  -1.0413902521133422
action_vel:  1.8442591428756714
fx_average:  1.4560340005324843
fy_average:  0.1104425008818545
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5439659994675157
force_error_y:  0.1104425008818545
sobolev_term:  0.16206426673779384
REWARD:  -0.816472767087164
--------
observation:  [-1.32419463  2.          0.         -0.86782521 -0.15574086 -0.86804602
 -1.2        -1.04139025  3.          2.5522989   1.84425914]
timesteps:  10
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.5233035017493615
fy_average:  0.12638931279374788
target_force_x: 

action_pos:  0.7508445739746094
action_vel:  3.0
fx_average:  2.5688297452815383
fy_average:  -0.5544017831618008
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5688297452815383
force_error_y:  -0.5544017831618008
sobolev_term:  0.3450968277432822
REWARD:  -1.4683283561866212
--------
observation:  [-0.1087415   2.          0.          0.62570381  1.         -1.2
 -1.2         0.75084457  2.57868469  3.          3.        ]
timesteps:  23
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.4002328853927204
fy_average:  -0.9934869791320711
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4002328853927204
force_error_y:  -0.9934869791320711
sobolev_term:  0.4162086647915219
REWARD:  -1.8099285293163134
--------
observation:  [ 0.70442931  2.          0.         -1.          1.         -1.2
  0.75084457 -1.2         3.          3.          3.        ]
timesteps:  24
action_pos:  -1.2
action_vel:  3.0
fx_average:  3.1162777574541014
fy_average:  0.41472861171785275
ta

action_pos:  -0.10420618057250976
action_vel:  3.0
fx_average:  2.24243078013907
fy_average:  -0.005473867878809291
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.24243078013906993
force_error_y:  -0.005473867878809291
sobolev_term:  0.410282323923815
REWARD:  -0.6581869719416942
--------
observation:  [-0.6138971   2.          0.         -0.08683848  1.         -0.91977718
 -1.2        -0.10420618  3.          3.          3.        ]
timesteps:  38
action_pos:  -0.4035581588745117
action_vel:  1.0448793172836304
fx_average:  1.1539017832847036
fy_average:  0.19957834654149784
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8460982167152964
force_error_y:  0.19957834654149784
sobolev_term:  0.389341126565711
REWARD:  -1.435017689822505
--------
observation:  [-0.8570113   2.          0.         -0.33629847 -0.95512068 -1.2
 -0.10420618 -0.40355816  3.          3.          1.04487932]
timesteps:  39
action_pos:  -1.2
action_vel:  1.349094271659851
fx_average:  

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9240374283257976
fy_average:  0.8240089478218363
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0759625716742023
force_error_y:  0.8240089478218363
sobolev_term:  0.3503385336782463
REWARD:  -2.250310053174285
--------
observation:  [-1.109285  2.        0.       -1.        1.       -1.2      -1.2
 -1.2       3.        3.        3.      ]
timesteps:  53
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.1165194994746757
fy_average:  0.610261850542652
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8834805005253243
force_error_y:  0.610261850542652
sobolev_term:  0.32598396383836875
REWARD:  -1.8197263149063452
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  54
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
|    ep_rew_mean     

fx_average:  1.3741217306959272
fy_average:  0.3485360684349283
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6258782693040728
force_error_y:  0.3485360684349283
sobolev_term:  0.22034307850264623
REWARD:  -1.1947574162416474
--------
observation:  [-1.22229284  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  66
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0981351611215533
fy_average:  0.2561585227540183
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9018648388784467
force_error_y:  0.2561585227540183
sobolev_term:  0.16896620042682864
REWARD:  -1.3269895620592935
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  67
action_pos:  -1.1381046295166015
action_vel:  1.7257003784179688
fx_average:  1.0115667490372
fy_average:  0.2642228481017172
target_force_x:  2.0
target_force_y

action_pos:  1.1678412437438965
action_vel:  2.48820561170578
fx_average:  2.0224282399583333
fy_average:  -0.5380934018934095
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.022428239958333318
force_error_y:  -0.5380934018934095
sobolev_term:  0.3816686575145377
REWARD:  -0.9421902993662805
--------
observation:  [ 0.11914198  2.          0.          0.97320104  0.48820561 -1.2
 -1.2         1.16784124  3.          2.77099562  2.48820561]
timesteps:  81
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.5069618163496226
fy_average:  -1.1568180676531536
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5069618163496226
force_error_y:  -1.1568180676531536
sobolev_term:  0.45550045575650977
REWARD:  -2.119280339759286
--------
observation:  [ 0.16643355  2.          0.         -1.          1.         -1.2
  1.16784124 -1.2         2.77099562  2.48820561  3.        ]
timesteps:  82
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.503400289045951
fy_average:  0.258974

fx_average:  0.5103465114807906
fy_average:  1.3132075830779153
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4896534885192094
force_error_y:  1.3132075830779153
sobolev_term:  0.33534641376701424
REWARD:  -3.138207485364139
--------
observation:  [-1.08150841  2.          0.         -1.          1.         -0.92442749
 -1.17314186 -1.2         3.          2.43582141  3.        ]
timesteps:  94
action_pos:  -1.2
action_vel:  1.7353787422180176
fx_average:  0.5570312207427484
fy_average:  1.4967618347314304
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4429687792572516
force_error_y:  1.4967618347314304
sobolev_term:  0.3276620247896355
REWARD:  -3.2673926387783174
--------
observation:  [-1.47661211  2.          0.         -1.         -0.26462126 -1.17314186
 -1.2        -1.2         2.43582141  3.          1.73537874]
timesteps:  95
action_pos:  -1.1868936538696289
action_vel:  2.7224572896957397
fx_average:  1.1328096544120354
fy_average:  1.148282409816

fx_average:  0.8173364313259398
fy_average:  -0.20612880532404798
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.18266356867406
force_error_y:  -0.20612880532404798
sobolev_term:  0.1914670069729182
REWARD:  -1.5802593809710264
--------
observation:  [-1.30050742  2.          0.         -1.          0.40766478 -1.00644071
 -1.2        -1.2         3.          3.          2.40766478]
timesteps:  109
action_pos:  -0.21112997531890867
action_vel:  2.9682048559188843
fx_average:  2.354363991106054
fy_average:  -0.5199939419937503
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3543639911060539
force_error_y:  -0.5199939419937503
sobolev_term:  0.3030710988623507
REWARD:  -1.1774290319621548
--------
observation:  [-0.67843349  2.          0.         -0.17594165  0.96820486 -1.2
 -1.2        -0.21112998  3.          2.40766478  2.96820486]
timesteps:  110
action_pos:  -1.2
action_vel:  2.7620315551757812
fx_average:  1.5900203046999979
fy_average:  -0.1606222561573

fx_average:  1.7899426883541987
fy_average:  -1.2323348358965147
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.21005731164580133
force_error_y:  -1.2323348358965147
sobolev_term:  0.5003017004736067
REWARD:  -1.9426938480159226
--------
observation:  [-0.67970753  2.          0.         -0.68928981 -0.44581652 -0.39615684
 -1.2        -0.82714777  1.75139439  2.51666933  1.55418348]
timesteps:  122
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.3451049637980015
fy_average:  0.7587018679587567
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6548950362019985
force_error_y:  0.7587018679587567
sobolev_term:  0.5177728111082953
REWARD:  -1.9313697152690505
--------
observation:  [-1.17581624  2.          0.         -1.          1.         -1.2
 -0.82714777 -1.2         2.51666933  1.55418348  3.        ]
timesteps:  123
action_pos:  -0.9675523281097411
action_vel:  3.0
fx_average:  2.1696306339314257
fy_average:  0.8122666879326385
target_force_x:  2.0
target

action_pos:  -0.07362127304077148
action_vel:  3.0
fx_average:  3.0093706047877107
fy_average:  -0.456106490575393
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.0093706047877107
force_error_y:  -0.456106490575393
sobolev_term:  0.3999492516716211
REWARD:  -1.8654263470347248
--------
observation:  [-0.52927381  2.          0.         -0.06135106  1.         -0.66113534
 -1.2        -0.07362127  3.          3.          3.        ]
timesteps:  137
action_pos:  1.2
action_vel:  3.0
fx_average:  2.1586279586102557
fy_average:  -2.107082139183994
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.1586279586102557
force_error_y:  -2.107082139183994
sobolev_term:  0.40954623105863436
REWARD:  -2.6752563288528837
--------
observation:  [ 0.74735161  2.          0.          1.          1.         -1.2
 -0.07362127  1.2         3.          3.          3.        ]
timesteps:  138
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.1620137045962498
fy_average:  -2.423573986110

action_pos:  -1.2
action_vel:  3.0
fx_average:  2.025294332184601
fy_average:  0.942167274669653
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.025294332184600865
force_error_y:  0.942167274669653
sobolev_term:  0.5000758250057102
REWARD:  -1.467537431859964
--------
observation:  [-0.99226279  2.          0.         -1.          1.          0.19538383
 -0.90124569 -1.2         2.44681686  3.          3.        ]
timesteps:  151
action_pos:  0.5022972106933593
action_vel:  3.0
fx_average:  3.081310556398378
fy_average:  -0.2339864664432947
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.0813105563983778
force_error_y:  -0.2339864664432947
sobolev_term:  0.5530276665571835
REWARD:  -1.868324689398856
--------
observation:  [ 0.12861592  2.          0.          0.41858101  1.         -0.90124569
 -1.2         0.50229721  3.          3.          3.        ]
timesteps:  152
action_pos:  -0.07272083759307861
action_vel:  1.0
fx_average:  2.990350782837028
fy_average

action_pos:  -0.8866290092468262
action_vel:  2.862868547439575
fx_average:  4.065505805935182
fy_average:  -0.25964632185567205
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  2.0655058059351816
force_error_y:  -0.25964632185567205
sobolev_term:  0.4115249848394947
REWARD:  -2.7366771126303484
--------
observation:  [-0.81958395  2.          0.         -0.73885751  0.86286855  1.2
 -1.2        -0.88662901  3.          2.01947284  2.86286855]
timesteps:  166
action_pos:  -1.0129297256469727
action_vel:  3.0
fx_average:  3.838822684469679
fy_average:  -0.3481950346391545
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.8388226844696791
force_error_y:  -0.3481950346391545
sobolev_term:  0.42371411918959373
REWARD:  -2.6107318382984275
--------
observation:  [-0.96186825  2.          0.         -0.8441081   1.         -1.2
 -0.88662901 -1.01292973  2.01947284  2.86286855  3.        ]
timesteps:  167
action_pos:  -0.3044883728027344
action_vel:  3.0
fx_average:  3.3330

fx_average:  1.8129283941694567
fy_average:  1.4568865034039005
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.18707160583054327
force_error_y:  1.4568865034039005
sobolev_term:  0.5579547470237796
REWARD:  -2.2019128562582235
--------
observation:  [-1.1000719   2.          0.         -0.82860649  0.6776886  -1.16324608
 -1.2        -0.99432778  3.          3.          2.6776886 ]
timesteps:  181
action_pos:  0.1837287425994873
action_vel:  2.167494058609009
fx_average:  1.9449457303458069
fy_average:  0.9197131071416929
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.05505426965419313
force_error_y:  0.9197131071416929
sobolev_term:  0.5280455648627893
REWARD:  -1.5028129416586753
--------
observation:  [-0.3889529   2.          0.          0.15310729  0.16749406 -1.2
 -0.99432778  0.18372874  3.          2.6776886   2.16749406]
timesteps:  182
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 

fx_average:  1.6114760763215508
fy_average:  0.45758368478118316
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.38852392367844923
force_error_y:  0.45758368478118316
sobolev_term:  0.5064831904173817
REWARD:  -1.352590798877014
--------
observation:  [-0.54624048  2.          0.         -0.40433508  1.         -1.2
 -0.33049328 -0.4852021   3.          2.77435571  3.        ]
timesteps:  194
action_pos:  -0.7130031108856201
action_vel:  2.5587021112442017
fx_average:  1.678276876096162
fy_average:  0.4352703172057763
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.32172312390383806
force_error_y:  0.4352703172057763
sobolev_term:  0.48711478611012726
REWARD:  -1.2441082272197417
--------
observation:  [-0.71043202  2.          0.         -0.59416926  0.55870211 -0.33049328
 -0.4852021  -0.71300311  2.77435571  3.          2.55870211]
timesteps:  195
action_pos:  -1.2
action_vel:  1.8438628911972046
fx_average:  1.0839954961579368
fy_average:  0.40979473139614

fx_average:  3.380070423705368
fy_average:  0.01256575185345176
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.380070423705368
force_error_y:  0.01256575185345176
sobolev_term:  0.5389706986636477
REWARD:  -1.9316068742224672
--------
observation:  [-0.18371214  2.          0.         -0.13555843  0.75103158  0.50718985
 -0.54187045 -0.16267011  1.12716007  1.98561144  2.75103158]
timesteps:  209
action_pos:  -1.2
action_vel:  2.8858381509780884
fx_average:  3.4346132363730626
fy_average:  0.2463760808648137
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4346132363730626
force_error_y:  0.2463760808648137
sobolev_term:  0.5553704686644407
REWARD:  -2.2363597859023168
--------
observation:  [-0.29312034  2.          0.         -1.          0.88583815 -0.54187045
 -0.16267011 -1.2         1.98561144  2.75103158  2.88583815]
timesteps:  210
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.89403472896219
fy_average:  1.1062844931331042
target_force_x:  2.0
target

fx_average:  2.1593478602691794
fy_average:  -0.450115852854153
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.15934786026917935
force_error_y:  -0.450115852854153
sobolev_term:  0.5498627050445938
REWARD:  -1.1593264181679261
--------
observation:  [-0.10114914  2.          0.          0.0846324  -0.01404345  0.56833384
 -1.2         0.10155888  2.49396032  2.6215111   1.98595655]
timesteps:  222
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.411231560699353
fy_average:  -0.8892474819733731
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.41123156069935307
force_error_y:  -0.8892474819733731
sobolev_term:  0.5349979478810206
REWARD:  -1.835476990553747
--------
observation:  [-0.35122018  2.          0.         -1.          1.         -1.2
  0.10155888 -1.2         2.6215111   1.98595655  3.        ]
timesteps:  223
action_pos:  -1.07702579498291
action_vel:  3.0
fx_average:  2.0450334744169862
fy_average:  -0.6457965613793513
target_force_x:  2.0
target_for

fx_average:  1.9114865452144019
fy_average:  0.979025216573318
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.08851345478559813
force_error_y:  0.979025216573318
sobolev_term:  0.521848191709433
REWARD:  -1.589386863068349
--------
observation:  [-1.37083658  2.          0.         -1.         -0.15762556 -1.2
 -1.2        -1.2         1.30542046  3.          1.84237444]
timesteps:  237
action_pos:  -0.6042662858963013
action_vel:  2.2061811685562134
fx_average:  2.8799785871166517
fy_average:  0.09531672067495922
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8799785871166517
force_error_y:  0.09531672067495922
sobolev_term:  0.3940354404850747
REWARD:  -1.3693307482766857
--------
observation:  [-0.86927614  2.          0.         -0.50355524  0.20618117 -1.2
 -1.2        -0.60426629  3.          1.84237444  2.20618117]
timesteps:  238
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.641074408182359
fy_average:  0.3302889212615433
target_force_x:  2.0
targ

fx_average:  2.7906468716913375
fy_average:  -0.11103295101580815
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7906468716913375
force_error_y:  -0.11103295101580815
sobolev_term:  0.4236223450472359
REWARD:  -1.3253021677543815
--------
observation:  [-0.47044991  2.          0.         -0.33729792  0.41496313 -1.2
 -1.2        -0.4047575   1.84844041  1.80533713  2.41496313]
timesteps:  250
-----------RESET!!!!!!!!!!!!!-----------
action_pos:  -0.42535886764526365
action_vel:  2.1144078373908997
fx_average:  0.6954948048892352
fy_average:  -0.33167709274441354
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3045051951107647
force_error_y:  -0.33167709274441354
sobolev_term:  0.12619975497779393
REWARD:  -1.7623820428329722
--------
observation:  [-0.23395544  2.          0.         -0.35446572  0.11440784 -1.2
 -0.4047575  -0.42535887  1.80533713  2.41496313  2.11440784]
timesteps:  1
action_pos:  1.2
action_vel:  2.4281150698661804
fx_average:  0.522248121

action_pos:  -1.1255550384521484
action_vel:  3.0
fx_average:  3.207857000629663
fy_average:  -1.3757934294677374
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.2078570006296632
force_error_y:  -1.3757934294677374
sobolev_term:  0.4742479691362377
REWARD:  -3.0578983992336384
--------
observation:  [ 0.5852445   2.          0.         -0.93796253  1.         -0.643606
  0.4548666  -1.12555504  2.74866074  3.          3.        ]
timesteps:  15
action_pos:  0.5735458374023438
action_vel:  3.0
fx_average:  3.880459396273853
fy_average:  -1.6824099832121393
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.880459396273853
force_error_y:  -1.6824099832121393
sobolev_term:  0.5885524914896437
REWARD:  -4.151421870975636
--------
observation:  [ 0.36175219  2.          0.          0.47795486  1.          0.4548666
 -1.12555504  0.57354584  3.          3.          3.        ]
timesteps:  16
action_pos:  -0.7384443283081055
action_vel:  3.0
fx_average:  3.208235529420461

action_pos:  -1.2
action_vel:  2.833909809589386
fx_average:  1.531754951088643
fy_average:  1.2317318715450059
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.468245048911357
force_error_y:  1.2317318715450059
sobolev_term:  0.49095349673760186
REWARD:  -2.1909304171939645
--------
observation:  [-1.23445309  2.          0.         -1.          0.83390981 -1.2
 -1.2        -1.2         3.          3.          2.83390981]
timesteps:  29
action_pos:  -1.2
action_vel:  2.9008066058158875
fx_average:  0.8312409413879556
fy_average:  1.4960972907661005
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1687590586120444
force_error_y:  1.4960972907661005
sobolev_term:  0.384431319370539
REWARD:  -3.049287668748684
--------
observation:  [-1.31226658  2.          0.         -1.          0.90080661 -1.2
 -1.2        -1.2         3.          2.83390981  2.90080661]
timesteps:  30
action_pos:  -0.7749477624893188
action_vel:  3.0
fx_average:  1.6436103201713408
fy_average

action_pos:  -1.2
action_vel:  3.0
fx_average:  1.1187244300292094
fy_average:  0.18580140092030148
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8812755699707906
force_error_y:  0.18580140092030148
sobolev_term:  0.2438427842387528
REWARD:  -1.3109197551298448
--------
observation:  [-1.36076992  2.          0.         -1.          1.         -1.16716912
 -1.2        -1.2         1.78468299  2.62624401  3.        ]
timesteps:  44
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6068187156184992
fy_average:  0.07132079104065724
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3931812843815008
force_error_y:  0.07132079104065724
sobolev_term:  0.18966279971238675
REWARD:  -0.6541648751345448
--------
observation:  [-1.32917984  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.62624401  3.          3.        ]
timesteps:  45
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6600454991344438
fy_average:  0.15813674234698258
target

fx_average:  3.303002261986835
fy_average:  -0.05885941493393688
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.303002261986835
force_error_y:  -0.05885941493393688
sobolev_term:  0.326667320217861
REWARD:  -1.688528997138633
--------
observation:  [-0.41980033  2.          0.         -0.38674176  0.93942016 -0.57393923
 -0.34294088 -0.46409011  3.          2.39194232  2.93942016]
timesteps:  59
action_pos:  -1.2
action_vel:  1.4014151096343994
fx_average:  1.298671990873014
fy_average:  0.8407702867465097
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.701328009126986
force_error_y:  0.8407702867465097
sobolev_term:  0.4062715318376047
REWARD:  -1.9483698277111003
--------
observation:  [-0.68762984  2.          0.         -1.         -0.59858489 -0.34294088
 -0.46409011 -1.2         2.39194232  2.93942016  1.40141511]
timesteps:  60
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|

fx_average:  1.2855567787186417
fy_average:  0.3453743828802242
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7144432212813583
force_error_y:  0.3453743828802242
sobolev_term:  0.35665651238253687
REWARD:  -1.4164741165441195
--------
observation:  [-0.55453539  2.          0.         -0.46202111  0.6248095  -0.84824002
 -1.2        -0.55442533  3.          3.          2.6248095 ]
timesteps:  72
action_pos:  0.4003715515136719
action_vel:  2.6403815150260925
fx_average:  1.8227237230663165
fy_average:  -0.47475570575417425
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.17727627693368353
force_error_y:  -0.47475570575417425
sobolev_term:  0.4127464602413438
REWARD:  -1.0647784429292015
--------
observation:  [ 0.05281365  2.          0.          0.33364296  0.64038152 -1.2
 -0.55442533  0.40037155  3.          2.6248095   2.64038152]
timesteps:  73
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.6304511289938639
fy_average:  -0.24332137500433013
target_for

action_pos:  -0.545945119857788
action_vel:  3.0
fx_average:  2.9409307041911537
fy_average:  -0.5035776743653809
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9409307041911537
force_error_y:  -0.5035776743653809
sobolev_term:  0.23589728336718144
REWARD:  -1.680405661923716
--------
observation:  [-0.6955113   2.          0.         -0.45495427  1.         -1.16402507
 -0.30062127 -0.54594512  3.          3.          3.        ]
timesteps:  87
action_pos:  -0.19980225563049317
action_vel:  2.133333206176758
fx_average:  3.4170413779091335
fy_average:  -0.8351414783591926
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4170413779091335
force_error_y:  -0.8351414783591926
sobolev_term:  0.24771232158096104
REWARD:  -2.499895177849287
--------
observation:  [-0.32801814  2.          0.         -0.16650188  0.13333321 -0.30062127
 -0.54594512 -0.19980226  3.          3.          2.13333321]
timesteps:  88
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.68529216

fx_average:  0.03932342805377725
fy_average:  0.5956498960058432
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.9606765719462227
force_error_y:  0.5956498960058432
sobolev_term:  0.26473250037771245
REWARD:  -2.8210589683297784
--------
observation:  [-1.16092792  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.72725797  3.        ]
timesteps:  100
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.20479981359080396
fy_average:  0.2106826964415775
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.795200186409196
force_error_y:  0.2106826964415775
sobolev_term:  0.15517243252237106
REWARD:  -2.1610553153731447
--------
observation:  [-1.24557211  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.72725797  3.          3.        ]
timesteps:  101
action_pos:  -1.2
action_vel:  2.572335124015808
fx_average:  0.53310443578939
fy_average:  0.16578301930355815
target_force_x:  2.0
target_force_y:

fx_average:  2.3478230517561047
fy_average:  0.9355136047425778
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.34782305175610473
force_error_y:  0.9355136047425778
sobolev_term:  0.3949004758585355
REWARD:  -1.678237132357218
--------
observation:  [-0.94066176  2.          0.         -1.          1.         -0.1287931
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  115
action_pos:  -1.2
action_vel:  2.0763832330703735
fx_average:  1.9471640847959497
fy_average:  0.906950593405434
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.05283591520405029
force_error_y:  0.906950593405434
sobolev_term:  0.4046089666487381
REWARD:  -1.3643954752582224
--------
observation:  [-1.4119524   2.          0.         -1.          0.07638323 -1.2
 -1.2        -1.2         3.          3.          2.07638323]
timesteps:  116
action_pos:  -1.2
action_vel:  2.440737247467041
fx_average:  2.7811108161475255
fy_average:  0.905474695996884
target_force_x:  2.0

fx_average:  1.1833883865289325
fy_average:  0.19468900873155168
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8166116134710675
force_error_y:  0.19468900873155168
sobolev_term:  0.18696504978473955
REWARD:  -1.1982656719873586
--------
observation:  [-1.12852542  2.          0.         -1.          0.53410244 -1.2
 -1.2        -1.2         2.69104898  2.79122573  2.53410244]
timesteps:  128
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.7842117643815253
fy_average:  0.17948615504926993
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2157882356184748
force_error_y:  0.17948615504926993
sobolev_term:  0.15824293326094585
REWARD:  -1.5535173239286906
--------
observation:  [-1.12649288  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.79122573  2.53410244  3.        ]
timesteps:  129
action_pos:  0.03490347862243652
action_vel:  2.5611904859542847
fx_average:  2.1974889005355607
fy_average:  -0.38143189616237194
target_force_x

fx_average:  3.4429380185853415
fy_average:  -0.3563984718704921
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4429380185853415
force_error_y:  -0.3563984718704921
sobolev_term:  0.41332611990498097
REWARD:  -2.2126626103608147
--------
observation:  [-0.67378417  2.          0.         -0.44170731  1.         -0.85916405
 -1.2        -0.53004878  3.          1.52971017  3.        ]
timesteps:  143
action_pos:  -0.5701332092285156
action_vel:  3.0
fx_average:  3.116321277194507
fy_average:  -0.004427062583350126
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.116321277194507
force_error_y:  -0.004427062583350126
sobolev_term:  0.4079207659962084
REWARD:  -1.5286691057740653
--------
observation:  [-0.79857914  2.          0.         -0.47511101  1.         -1.2
 -0.53004878 -0.57013321  1.52971017  3.          3.        ]
timesteps:  144
action_pos:  -1.2
action_vel:  2.2376551628112793
fx_average:  2.9657638179387416
fy_average:  0.23183066625336285
target_fo

action_pos:  -1.2
action_vel:  2.0437909960746765
fx_average:  2.423988752405518
fy_average:  0.9682179517971896
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4239887524055179
force_error_y:  0.9682179517971896
sobolev_term:  0.5772035788500013
REWARD:  -1.9694102830527087
--------
observation:  [-1.08237952  2.          0.         -1.          0.043791   -0.53400764
 -0.6893368  -1.2         3.          3.          2.043791  ]
timesteps:  157
action_pos:  -0.21242015361785888
action_vel:  3.0
fx_average:  3.4614146949589832
fy_average:  1.3026615263344021
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4614146949589832
force_error_y:  1.3026615263344021
sobolev_term:  0.5035924792727968
REWARD:  -3.2676687005661824
--------
observation:  [-0.48618112  2.          0.         -0.17701679  1.         -0.6893368
 -1.2        -0.21242015  3.          2.043791    3.        ]
timesteps:  158
action_pos:  0.6339446067810058
action_vel:  3.0
fx_average:  2.21910417491

fx_average:  3.3980973717257137
fy_average:  0.5322463876865153
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.3980973717257137
force_error_y:  0.5322463876865153
sobolev_term:  0.5797110072757632
REWARD:  -2.5100547666879924
--------
observation:  [-1.16038219  2.          0.         -0.91654754  0.63758457 -1.13748972
 -1.2        -1.09985704  1.          1.86668932  2.63758457]
timesteps:  172
action_pos:  -1.2
action_vel:  1.0
fx_average:  1.7436283407515316
fy_average:  1.4287591864049356
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2563716592484684
force_error_y:  1.4287591864049356
sobolev_term:  0.5573221745219362
REWARD:  -2.2424530201753403
--------
observation:  [-1.06285709  2.          0.         -1.         -1.         -1.2
 -1.09985704 -1.2         1.86668932  2.63758457  1.        ]
timesteps:  173
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4232126593205343
fy_average:  1.2566595702051182
target_force_x:  2.0
target_force_y:  0.0
forc

fx_average:  1.0716560675017803
fy_average:  0.11086546342915216
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9283439324982197
force_error_y:  0.11086546342915216
sobolev_term:  0.08598627672613991
REWARD:  -1.1251956726535117
--------
observation:  [-1.3937718   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.78146827  2.80487251  3.        ]
timesteps:  187
action_pos:  -1.2
action_vel:  1.932420015335083
fx_average:  0.9423454844173594
fy_average:  0.050683630882496944
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0576545155826405
force_error_y:  0.050683630882496944
sobolev_term:  0.11152677337259208
REWARD:  -1.2198649198377294
--------
observation:  [-1.48617903  2.          0.         -1.         -0.06757998 -1.2
 -1.2        -1.2         2.80487251  3.          1.93242002]
timesteps:  188
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|  

fx_average:  2.07120276708494
fy_average:  0.08128006752684983
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.07120276708494
force_error_y:  0.08128006752684983
sobolev_term:  0.31019924673210514
REWARD:  -0.462682081343895
--------
observation:  [-0.55105227  2.          0.         -0.35299349  0.48770005 -1.2
 -1.2        -0.42359219  3.          3.          2.48770005]
timesteps:  200
action_pos:  -0.3157364845275879
action_vel:  3.0
fx_average:  2.9020056750007397
fy_average:  -0.5360760470022702
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.9020056750007397
force_error_y:  -0.5360760470022702
sobolev_term:  0.31921307779817876
REWARD:  -1.7572947998011885
--------
observation:  [-0.50472913  2.          0.         -0.26311374  1.         -1.2
 -0.42359219 -0.31573648  3.          2.48770005  3.        ]
timesteps:  201
action_pos:  -1.1433746337890625
action_vel:  2.6610620617866516
fx_average:  1.8261505623261072
fy_average:  -0.1519167047029386
target_

fx_average:  1.3251834014074486
fy_average:  0.8990517513290868
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6748165985925514
force_error_y:  0.8990517513290868
sobolev_term:  0.44269758745676513
REWARD:  -2.0165659373784033
--------
observation:  [-1.04570195  2.          0.         -1.          0.40069857 -0.78100941
 -1.2        -1.2         2.43617222  2.05731559  2.40069857]
timesteps:  215
action_pos:  -1.1216118335723877
action_vel:  1.3869380354881287
fx_average:  0.9786892265940954
fy_average:  0.8745578863800836
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0213107734059046
force_error_y:  0.8745578863800836
sobolev_term:  0.37732452574266073
REWARD:  -2.273193185528649
--------
observation:  [-1.17493329  2.          0.         -0.93467653 -0.61306196 -1.2
 -1.2        -1.12161183  2.05731559  2.40069857  1.38693804]
timesteps:  216
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.077822634734
fy_average:  0.23916126682033825
target_force_x:  

fx_average:  2.703269795223389
fy_average:  -0.5792349874753258
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7032697952233891
force_error_y:  -0.5792349874753258
sobolev_term:  0.294407171911606
REWARD:  -1.5769119546103207
--------
observation:  [-0.14306582  2.          0.         -0.04029417  0.08109844 -1.2
 -0.35965719 -0.048353    2.03729987  2.83782846  2.08109844]
timesteps:  228
action_pos:  -1.2
action_vel:  1.78739333152771
fx_average:  1.8785708955657197
fy_average:  0.20913151115942674
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.12142910443428034
force_error_y:  0.20913151115942674
sobolev_term:  0.40556774525300215
REWARD:  -0.7361283608467093
--------
observation:  [-0.52916078  2.          0.         -1.         -0.21260667 -0.35965719
 -0.048353   -1.2         2.83782846  2.08109844  1.78739333]
timesteps:  229
action_pos:  -0.9877345561981201
action_vel:  3.0
fx_average:  1.7636961804360896
fy_average:  0.7385979950296381
target_force_x

fx_average:  3.5039111174593334
fy_average:  -0.5435416255596749
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.5039111174593334
force_error_y:  -0.5435416255596749
sobolev_term:  0.35767271241457843
REWARD:  -2.405125455433587
--------
observation:  [-0.30263881  2.          0.         -1.         -1.         -0.27025337
  0.75798726 -1.2         3.          3.          1.        ]
timesteps:  243
action_pos:  -1.2
action_vel:  2.1442927718162537
fx_average:  2.7137713825616165
fy_average:  0.13870345291864472
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.7137713825616165
force_error_y:  0.13870345291864472
sobolev_term:  0.423922352912336
REWARD:  -1.2763971883925973
--------
observation:  [-0.82117941  2.          0.         -1.          0.14429277  0.75798726
 -1.2        -1.2         3.          1.          2.14429277]
timesteps:  244
action_pos:  -0.9238609313964843
action_vel:  3.0
fx_average:  2.426926232184521
fy_average:  0.05558864897702998
target_

fx_average:  2.3347940052760614
fy_average:  0.5982669430058691
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3347940052760614
force_error_y:  0.5982669430058691
sobolev_term:  0.6107397036242331
REWARD:  -1.5438006519061638
--------
observation:  [-0.42528316  2.          0.         -1.          1.          0.07982898
 -0.71437182 -1.2         3.          2.67472857  3.        ]
timesteps:  6
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.654752779911661
fy_average:  0.7874516022254658
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6547527799116608
force_error_y:  0.7874516022254658
sobolev_term:  0.47932887579700273
REWARD:  -1.9215332579341293
--------
observation:  [-1.00393666  2.          0.         -1.          1.         -0.71437182
 -1.2        -1.2         2.67472857  3.          3.        ]
timesteps:  7
action_pos:  -1.0734302043914794
action_vel:  2.691528081893921
fx_average:  1.957121809409186
fy_average:  0.6474493260060759
target_force_x: 

fx_average:  0.6258923726526433
fy_average:  0.04584539279423465
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3741076273473567
force_error_y:  0.04584539279423465
sobolev_term:  0.06039312940414084
REWARD:  -1.4803461495457322
--------
observation:  [-1.3683904   2.          0.         -1.          0.62695283 -1.2
 -1.2        -1.2         3.          3.          2.62695283]
timesteps:  21
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9318737666393808
fy_average:  0.03713830332147121
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0681262333606192
force_error_y:  0.03713830332147121
sobolev_term:  0.08402730943320913
REWARD:  -1.1892918461152995
--------
observation:  [-1.26147896  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.62695283  3.        ]
timesteps:  22
action_pos:  -1.2
action_vel:  2.0450326204299927
fx_average:  1.234682915851195
fy_average:  0.08219818195077142
target_force_x:  2.0
target_force

action_pos:  -0.42094717025756834
action_vel:  2.7795885801315308
fx_average:  1.5485020163027015
fy_average:  -0.26673007422960104
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.45149798369729854
force_error_y:  -0.26673007422960104
sobolev_term:  0.5358830859391768
REWARD:  -1.2541111438660764
--------
observation:  [-0.26304056  2.          0.         -0.35078931  0.77958858  1.2
 -0.15084314 -0.42094717  3.          3.          2.77958858]
timesteps:  35
action_pos:  0.12523212432861328
action_vel:  3.0
fx_average:  1.6104509968781753
fy_average:  -0.9100408335213257
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3895490031218247
force_error_y:  -0.9100408335213257
sobolev_term:  0.5205847958340267
REWARD:  -1.820174632477177
--------
observation:  [ 0.09709236  2.          0.          0.1043601   1.         -0.15084314
 -0.42094717  0.12523212  3.          2.77958858  3.        ]
timesteps:  36
action_pos:  -0.9061293125152587
action_vel:  3.0
fx_averag

fx_average:  3.44223165222365
fy_average:  1.4352414235555495
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4422316522236498
force_error_y:  1.4352414235555495
sobolev_term:  0.49661189545093676
REWARD:  -3.3740849712301357
--------
observation:  [-1.28849457  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.01013148  3.        ]
timesteps:  50
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.401025612381581
fy_average:  1.4656605604461468
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4010256123815812
force_error_y:  1.4656605604461468
sobolev_term:  0.45302298312513856
REWARD:  -2.3197091559528666
--------
observation:  [-1.46385262  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.01013148  3.          3.        ]
timesteps:  51
action_pos:  -1.2
action_vel:  1.0
fx_average:  1.8393055553039033
fy_average:  2.3872618820258267
target_force_x:  2.0
target_force_y:  0.0
force_error_x: 

fx_average:  1.258851504990545
fy_average:  -0.0038967013404272834
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7411484950094549
force_error_y:  -0.0038967013404272834
sobolev_term:  0.13758981214428362
REWARD:  -0.8826350084941659
--------
observation:  [-1.18315701  2.          0.         -1.          0.57585543 -1.2
 -1.07444959 -1.2         2.90038109  3.          2.57585543]
timesteps:  65
action_pos:  -0.4155262470245361
action_vel:  3.0
fx_average:  2.281656837403467
fy_average:  -0.26946696662005093
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2816568374034669
force_error_y:  -0.26946696662005093
sobolev_term:  0.20167789615683057
REWARD:  -0.7528017001803484
--------
observation:  [-0.76364629  2.          0.         -0.34627187  1.         -1.07444959
 -1.2        -0.41552625  3.          2.57585543  3.        ]
timesteps:  66
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250   

fx_average:  2.833738815938187
fy_average:  0.4895779925375784
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.8337388159381871
force_error_y:  0.4895779925375784
sobolev_term:  0.6483176758790032
REWARD:  -1.9716344843547688
--------
observation:  [ 0.29079624  2.          0.         -1.          0.61498827 -1.2
 -0.07891231 -1.2         2.63454521  3.          2.61498827]
timesteps:  78
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.637225816301925
fy_average:  2.9187627740344815
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6372258163019251
force_error_y:  2.9187627740344815
sobolev_term:  0.6885206167290803
REWARD:  -4.244509207065487
--------
observation:  [-0.90045989  2.          0.         -1.          1.         -0.07891231
 -1.2        -1.2         3.          2.61498827  3.        ]
timesteps:  79
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.603082061232689
fy_average:  1.990202429617357
target_force_x:  2.0
target_force_y:  0.0
force_error_

fx_average:  1.1038033627480013
fy_average:  0.2837148148055342
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8961966372519987
force_error_y:  0.2837148148055342
sobolev_term:  0.44793920275628696
REWARD:  -1.6278506548138199
--------
observation:  [-1.25245643  2.          0.         -1.          1.         -1.13979449
 -0.95335572 -1.2         2.14259803  2.70159173  3.        ]
timesteps:  93
action_pos:  -1.2
action_vel:  2.5779845118522644
fx_average:  0.09229593040191864
fy_average:  0.7319921687268734
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.9077040695980814
force_error_y:  0.7319921687268734
sobolev_term:  0.38050952220418605
REWARD:  -3.020205760529141
--------
observation:  [-1.12120149  2.          0.         -1.          0.57798451 -0.95335572
 -1.2        -1.2         2.70159173  3.          2.57798451]
timesteps:  94
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.42398235427277303
fy_average:  0.2947612076728717
target_force_x:  2.0
t

fx_average:  0.7403610427905175
fy_average:  1.0041520248375657
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2596389572094826
force_error_y:  1.0041520248375657
sobolev_term:  0.40749433251361056
REWARD:  -2.671285314560659
--------
observation:  [-1.36966236  2.          0.         -1.          0.62559628 -1.2
 -1.2        -1.2         3.          3.          2.62559628]
timesteps:  106
action_pos:  -1.2
action_vel:  2.1837422847747803
fx_average:  1.4823036869508435
fy_average:  0.637795882592078
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5176963130491565
force_error_y:  0.637795882592078
sobolev_term:  0.3130363517831845
REWARD:  -1.468528547424419
--------
observation:  [-1.44688502  2.          0.         -1.          0.18374228 -1.2
 -1.2        -1.2         3.          2.62559628  2.18374228]
timesteps:  107
action_pos:  0.4587168216705322
action_vel:  3.0
fx_average:  3.173456228090251
fy_average:  -0.768287489655213
target_force_x:  2.0
target

fx_average:  2.4747540613536523
fy_average:  -0.5015377968836534
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.4747540613536523
force_error_y:  -0.5015377968836534
sobolev_term:  0.3481517309483187
REWARD:  -1.3244435891856243
--------
observation:  [-1.41966483  2.          0.         -1.         -0.71246707 -1.2
 -0.96298707 -1.2         2.78811407  3.          1.28753293]
timesteps:  121
action_pos:  -0.060813188552856445
action_vel:  3.0
fx_average:  2.6297052876652183
fy_average:  -0.456745000766792
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6297052876652183
force_error_y:  -0.456745000766792
sobolev_term:  0.3596782148404982
REWARD:  -1.4461285032725084
--------
observation:  [-0.64894813  2.          0.         -0.05067766  1.         -0.96298707
 -1.2        -0.06081319  3.          1.28753293  3.        ]
timesteps:  122
action_pos:  -0.42128806114196776
action_vel:  3.0
fx_average:  3.308202047637677
fy_average:  -0.1678575295970808
target_force

fx_average:  1.9246108470449759
fy_average:  0.24040779692727057
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.07538915295502413
force_error_y:  0.24040779692727057
sobolev_term:  0.2875956594993422
REWARD:  -0.6033926093816369
--------
observation:  [-1.1448545   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.96753377  3.        ]
timesteps:  134
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6291670718590057
fy_average:  0.25730541177980504
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.37083292814099433
force_error_y:  0.25730541177980504
sobolev_term:  0.2675635393786554
REWARD:  -0.8957018792994548
--------
observation:  [-1.22540374  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.96753377  3.          3.        ]
timesteps:  135
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.5704984794332787
fy_average:  0.19115987934052622
target_force_x:  2.0
target_force_y:  0.0
for

fx_average:  2.573339735562155
fy_average:  -0.448986823597485
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.573339735562155
force_error_y:  -0.448986823597485
sobolev_term:  0.5071195845681763
REWARD:  -1.5294461437278164
--------
observation:  [-0.99584168  2.          0.         -1.          1.         -1.2
 -1.00730968 -1.2         2.44862723  2.88348079  3.        ]
timesteps:  149
action_pos:  -1.066467833518982
action_vel:  3.0
fx_average:  1.6671727730782706
fy_average:  -0.42412775806818487
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3328272269217294
force_error_y:  -0.42412775806818487
sobolev_term:  0.4064152983234974
REWARD:  -1.1633702833134116
--------
observation:  [-1.06617353  2.          0.         -0.88872319  1.         -1.00730968
 -1.2        -1.06646783  2.88348079  3.          3.        ]
timesteps:  150
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.798500867411212
fy_average:  0.9360689794155541
target_force_x:  2.0
target_for

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.7690146960972699
fy_average:  0.5620872446845939
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.23098530390273
force_error_y:  0.5620872446845939
sobolev_term:  0.3201716652241437
REWARD:  -2.1132442138114675
--------
observation:  [-1.14407062  2.          0.         -1.          1.         -0.97724297
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  163
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.5096799050547177
fy_average:  -0.14117991117710782
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4903200949452824
force_error_y:  -0.14117991117710782
sobolev_term:  0.11545355355604026
REWARD:  -1.7469535596784302
--------
observation:  [-1.15952891  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  164
action_pos:  -0.575507640838623
action_vel:  2.9212249517440796
fx_average:  1.4937120695775992
fy_average

fx_average:  2.626538661709671
fy_average:  0.6149444974434033
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6265386617096711
force_error_y:  0.6149444974434033
sobolev_term:  0.38593003538802795
REWARD:  -1.6274131945411023
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  178
action_pos:  0.019915723800659178
action_vel:  3.0
fx_average:  3.4107061695070295
fy_average:  0.8518885238268619
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4107061695070295
force_error_y:  0.8518885238268619
sobolev_term:  0.3889929198169759
REWARD:  -2.6515876131508676
--------
observation:  [-0.56895076  2.          0.          0.01659644  1.         -1.2
 -1.2         0.01991572  3.          3.          3.        ]
timesteps:  179
action_pos:  -1.2
action_vel:  3.0
fx_average:  3.257130300674227
fy_average:  0.9188565434616699
target_force_x:  2.0
target_force_y:  0.0
force_e

fx_average:  1.054219464533751
fy_average:  0.2890348952848431
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.945780535466249
force_error_y:  0.2890348952848431
sobolev_term:  0.24888754652713774
REWARD:  -1.48370297727823
--------
observation:  [-1.41557243  2.          0.         -0.9350009   0.27841508 -1.2
 -1.16654735 -1.12200108  3.          3.          2.27841508]
timesteps:  193
action_pos:  0.14692883491516112
action_vel:  3.0
fx_average:  2.0776844616590933
fy_average:  -0.06468010913062529
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.07768446165909326
force_error_y:  -0.06468010913062529
sobolev_term:  0.3141661242121299
REWARD:  -0.45653069500184845
--------
observation:  [-0.65782576  2.          0.          0.1224407   1.         -1.16654735
 -1.12200108  0.14692883  3.          2.27841508  3.        ]
timesteps:  194
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|

fx_average:  1.578578466773243
fy_average:  1.8776017057989578
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.421421533226757
force_error_y:  1.8776017057989578
sobolev_term:  0.5528955010687501
REWARD:  -2.8519187400944648
--------
observation:  [-1.31862584  2.          0.         -0.90041709 -0.02155721 -1.2
 -1.2        -1.08050051  2.97302783  3.          1.97844279]
timesteps:  206
action_pos:  -0.7989783525466919
action_vel:  3.0
fx_average:  2.2442523359321904
fy_average:  0.6519725479272468
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.24425233593219042
force_error_y:  0.6519725479272468
sobolev_term:  0.5360541693664661
REWARD:  -1.4322790532259033
--------
observation:  [-0.83572414  2.          0.         -0.66581529  1.         -1.2
 -1.08050051 -0.79897835  3.          1.97844279  3.        ]
timesteps:  207
action_pos:  -0.9563060045242309
action_vel:  2.8852578997612
fx_average:  1.5952248214385298
fy_average:  1.0172435169464735
target_force

fx_average:  0.6250533234762906
fy_average:  0.7233023425722124
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3749466765237095
force_error_y:  0.7233023425722124
sobolev_term:  0.30094586385593725
REWARD:  -2.399194882951859
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  221
action_pos:  0.4646111011505127
action_vel:  3.0
fx_average:  2.011944351553008
fy_average:  -0.6296054483828408
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.011944351553008214
force_error_y:  -0.6296054483828408
sobolev_term:  0.24954955411893243
REWARD:  -0.8910993540547815
--------
observation:  [-0.4560419   2.          0.          0.38717592  1.         -1.2
 -1.2         0.4646111   3.          3.          3.        ]
timesteps:  222
action_pos:  0.521698522567749
action_vel:  3.0
fx_average:  2.0809983744858567
fy_average:  -1.4353328880208107
target_force_x:  2.0
target_for

fx_average:  1.2532793071942845
fy_average:  0.39183502859757174
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7467206928057155
force_error_y:  0.39183502859757174
sobolev_term:  0.32677595704798434
REWARD:  -1.4653316784512715
--------
observation:  [-1.11372339  2.          0.         -1.          1.         -0.69885499
 -1.2        -1.2         2.72669917  3.          3.        ]
timesteps:  234
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.7825497774885907
fy_average:  0.3803387616131699
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2174502225114092
force_error_y:  0.3803387616131699
sobolev_term:  0.2931409640592993
REWARD:  -1.8909299481838784
--------
observation:  [-1.13824737  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  235
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.4925335311658127
fy_average:  0.31647275992382357
target_force_x:  2.0
target_force_y:  0.0

fx_average:  1.3493829253850715
fy_average:  0.2387529567112212
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6506170746149285
force_error_y:  0.2387529567112212
sobolev_term:  0.18035923469820633
REWARD:  -1.069729266024356
--------
observation:  [-1.26877505  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  249
action_pos:  -1.2
action_vel:  2.75595760345459
fx_average:  1.320747667271368
fy_average:  0.2667180615412643
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.679252332728632
force_error_y:  0.2667180615412643
sobolev_term:  0.18389307894739992
REWARD:  -1.1298634732172963
--------
observation:  [-1.31143458  2.          0.         -1.          0.7559576  -1.2
 -1.2        -1.2         3.          3.          2.7559576 ]
timesteps:  250
-----------RESET!!!!!!!!!!!!!-----------
action_pos:  0.020442438125610352
action_vel:  2.8918580412864685
fx_average:  0.9125923736771692


fx_average:  2.1723879424725325
fy_average:  0.20885904933404606
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.17238794247253253
force_error_y:  0.20885904933404606
sobolev_term:  0.26225273567557134
REWARD:  -0.6434997274821499
--------
observation:  [-1.30936477  2.          0.         -1.          1.         -1.2
 -1.03722889 -1.2         1.30765271  2.81817275  3.        ]
timesteps:  12
action_pos:  -0.038278055191040036
action_vel:  3.0
fx_average:  3.240745168890395
fy_average:  -0.1517788157314475
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.240745168890395
force_error_y:  -0.1517788157314475
sobolev_term:  0.3076909555901631
REWARD:  -1.7002149402120055
--------
observation:  [-0.54359011  2.          0.         -0.03189838  1.         -1.03722889
 -1.2        -0.03827806  2.81817275  3.          3.        ]
timesteps:  13
action_pos:  -1.2
action_vel:  3.0
fx_average:  3.2275024819472518
fy_average:  -0.06307962473363321
target_force_x:  2.0
targe

fx_average:  1.70880511236873
fy_average:  0.15654113459352226
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.29119488763127
force_error_y:  0.15654113459352226
sobolev_term:  0.18699134884677912
REWARD:  -0.6347273710715714
--------
observation:  [-1.58317387  2.          0.         -1.         -0.28985596 -1.2
 -0.95515594 -1.2         2.94456053  3.          1.71014404]
timesteps:  27
action_pos:  -1.2
action_vel:  2.20555579662323
fx_average:  2.2580395748978694
fy_average:  0.1901103056120468
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.25803957489786944
force_error_y:  0.1901103056120468
sobolev_term:  0.19047743083147667
REWARD:  -0.638627311341393
--------
observation:  [-1.44779637  2.          0.         -1.          0.2055558  -0.95515594
 -1.2        -1.2         3.          1.71014404  2.2055558 ]
timesteps:  28
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4834757991304826
fy_average:  0.2209419915403692
target_force_x:  2.0
target_force_y

action_pos:  -1.2
action_vel:  2.0534629821777344
fx_average:  1.133775402047097
fy_average:  0.005304827268751631
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8662245979529031
force_error_y:  0.005304827268751631
sobolev_term:  0.1545048207871908
REWARD:  -1.0260342460088454
--------
observation:  [-1.39172756  2.          0.         -1.          0.05346298 -1.2
 -0.83238709 -1.2         2.97212225  2.42021447  2.05346298]
timesteps:  41
action_pos:  -0.8230307579040527
action_vel:  1.1879816055297852
fx_average:  2.3514021631097557
fy_average:  -0.0259627705710772
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.3514021631097557
force_error_y:  -0.0259627705710772
sobolev_term:  0.22816671642348796
REWARD:  -0.6055316501043209
--------
observation:  [-0.8414586   2.          0.         -0.68585896 -0.81201839 -0.83238709
 -1.2        -0.82303076  2.42021447  2.05346298  1.18798161]
timesteps:  42
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.02378684193

fx_average:  1.2259726318027577
fy_average:  1.003903923722258
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7740273681972423
force_error_y:  1.003903923722258
sobolev_term:  0.3663331612369154
REWARD:  -2.1442644531564157
--------
observation:  [-0.67821648  2.          0.         -1.          1.         -1.04520779
 -1.2        -1.2         1.86193502  1.          3.        ]
timesteps:  56
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0264485576900944
fy_average:  1.2860427954716533
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9735514423099056
force_error_y:  1.2860427954716533
sobolev_term:  0.3598354577795679
REWARD:  -2.6194296955611267
--------
observation:  [-1.26024794  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.          3.          3.        ]
timesteps:  57
action_pos:  -1.2
action_vel:  2.861275792121887
fx_average:  1.5643415481924778
fy_average:  1.2368981877024956
target_force_x:  2.0
target_force_y

fx_average:  3.4279067333230726
fy_average:  -0.9202330335076495
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.4279067333230726
force_error_y:  -0.9202330335076495
sobolev_term:  0.406682270459968
REWARD:  -2.7548220372906904
--------
observation:  [ 0.38260052  2.          0.          0.55577874  1.         -1.2
  0.19185319  0.66693449  2.6963864   1.44418192  3.        ]
timesteps:  71
action_pos:  -0.27732396125793457
action_vel:  3.0
fx_average:  3.380274267788218
fy_average:  -1.1877552016196886
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.3802742677882178
force_error_y:  -1.1877552016196886
sobolev_term:  0.4457938451182458
REWARD:  -3.013823314526152
--------
observation:  [-0.27707531  2.          0.         -0.2311033   1.          0.19185319
  0.66693449 -0.27732396  1.44418192  3.          3.        ]
timesteps:  72
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
| 

fx_average:  2.6777598121751804
fy_average:  1.3419105840945331
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.6777598121751804
force_error_y:  1.3419105840945331
sobolev_term:  0.6621210454554449
REWARD:  -2.6817914417251583
--------
observation:  [-0.94790606  2.          0.         -1.          1.         -0.16953006
 -1.2        -1.2         3.          2.52387416  3.        ]
timesteps:  84
action_pos:  -1.2
action_vel:  2.348314642906189
fx_average:  1.3029201629697515
fy_average:  1.760145608117178
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6970798370302485
force_error_y:  1.760145608117178
sobolev_term:  0.6265980413670644
REWARD:  -3.083823486514491
--------
observation:  [-1.03782073  2.          0.         -1.          0.34831464 -1.2
 -1.2        -1.2         2.52387416  3.          2.34831464]
timesteps:  85
action_pos:  -1.2
action_vel:  2.2222812175750732
fx_average:  1.570953171872416
fy_average:  1.8214053214323822
target_force_x:  2.0
ta

fx_average:  1.2608759997283265
fy_average:  1.246145909379713
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7391240002716735
force_error_y:  1.246145909379713
sobolev_term:  0.43636567748459243
REWARD:  -2.4216355871359787
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  99
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0252924525312044
fy_average:  1.2976071403997578
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9747075474687956
force_error_y:  1.2976071403997578
sobolev_term:  0.318713447690416
REWARD:  -2.5910281355589695
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  100
action_pos:  1.2
action_vel:  2.661804974079132
fx_average:  2.023883138196139
fy_average:  -0.8004741697451414
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.02

fx_average:  1.0415296777722898
fy_average:  0.13420036028146703
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9584703222277102
force_error_y:  0.13420036028146703
sobolev_term:  0.18183514091948635
REWARD:  -1.2745058234286635
--------
observation:  [-1.65543419  2.          0.         -1.         -1.         -1.2
 -1.2        -1.2         2.98590612  3.          1.        ]
timesteps:  112
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6587891603649823
fy_average:  0.1083935678482152
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.34121083963501775
force_error_y:  0.1083935678482152
sobolev_term:  0.22354911382783035
REWARD:  -0.6731535213110633
--------
observation:  [-1.38528526  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          1.          3.        ]
timesteps:  113
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4237393627142354
fy_average:  0.5519656960092518
target_force_x:  2.0
target_force_y:  0.0
force

fx_average:  1.6372341090092577
fy_average:  0.02722341179394572
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3627658909907423
force_error_y:  0.02722341179394572
sobolev_term:  0.3208797267668981
REWARD:  -0.710869029551586
--------
observation:  [-1.26992116  2.          0.         -1.          1.         -0.43167272
 -1.2        -1.2         1.99575901  3.          3.        ]
timesteps:  127
action_pos:  -1.1359700202941894
action_vel:  3.0
fx_average:  1.6710840562222211
fy_average:  -0.05206805860364052
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3289159437777789
force_error_y:  -0.05206805860364052
sobolev_term:  0.31785060698030543
REWARD:  -0.6988346093617248
--------
observation:  [-1.06231678  2.          0.         -0.94664168  1.         -1.2
 -1.2        -1.13597002  3.          3.          3.        ]
timesteps:  128
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.5580308133344942
fy_average:  -0.09537574420694156
target_force_x:  2.0
t

fx_average:  1.703271926028431
fy_average:  0.04908756547479746
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.296728073971569
force_error_y:  0.04908756547479746
sobolev_term:  0.23110860627843013
REWARD:  -0.5769242457247966
--------
observation:  [-0.99458679  2.          0.         -1.          0.80643845 -1.2
 -1.02780926 -1.2         2.69501388  1.59267128  2.80643845]
timesteps:  140
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4421012710467644
fy_average:  0.13491697179465856
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5578987289532356
force_error_y:  0.13491697179465856
sobolev_term:  0.2536961729468958
REWARD:  -0.94651187369479
--------
observation:  [-1.40877602  2.          0.         -1.          1.         -1.02780926
 -1.2        -1.2         1.59267128  2.80643845  3.        ]
timesteps:  141
action_pos:  -1.2
action_vel:  1.2809841632843018
fx_average:  1.7798777675289337
fy_average:  0.07752998867119547
target_force_x:  2.0
target_

fx_average:  2.3489481524507076
fy_average:  -0.219641999555464
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.34894815245070765
force_error_y:  -0.219641999555464
sobolev_term:  0.1597681011038376
REWARD:  -0.7283582531100092
--------
observation:  [-0.9757595   2.          0.         -0.61589539  0.73251885 -1.2
 -1.2        -0.73907447  2.84725916  3.          2.73251885]
timesteps:  155
action_pos:  -1.2
action_vel:  1.3001247644424438
fx_average:  2.1172776986739152
fy_average:  0.042039682689676346
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.11727769867391524
force_error_y:  0.042039682689676346
sobolev_term:  0.18887539394385175
REWARD:  -0.3481927753074433
--------
observation:  [-1.47284746  2.          0.         -1.         -0.69987524 -1.2
 -0.73907447 -1.2         3.          2.73251885  1.30012476]
timesteps:  156
action_pos:  -0.6932517528533936
action_vel:  2.978110373020172
fx_average:  3.3401519009130873
fy_average:  -0.0006252537936172138

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.5265804956308978
fy_average:  1.456488726844812
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.4734195043691023
force_error_y:  1.456488726844812
sobolev_term:  0.508689615226671
REWARD:  -3.438597846440585
--------
observation:  [-0.93175385  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.65200889  1.          3.        ]
timesteps:  169
action_pos:  -1.2
action_vel:  1.9884876608848572
fx_average:  0.884004409033861
fy_average:  1.394190858649521
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.115995590966139
force_error_y:  1.394190858649521
sobolev_term:  0.44973641944428167
REWARD:  -2.959922869059942
--------
observation:  [-1.17360924  2.          0.         -1.         -0.01151234 -1.2
 -1.2        -1.2         1.          3.          1.98848766]
timesteps:  170
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.5274879904118746
fy_average:  0.7095776820722046
target_for

fx_average:  1.743142333763345
fy_average:  0.3795298164816019
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.25685766623665507
force_error_y:  0.3795298164816019
sobolev_term:  0.3295804158397423
REWARD:  -0.9659678985579992
--------
observation:  [-1.57153228  2.          0.         -1.         -0.7898218  -1.2
 -1.2        -1.2         3.          2.24856025  1.2101782 ]
timesteps:  184
action_pos:  -1.2
action_vel:  2.168281316757202
fx_average:  1.7684854371798369
fy_average:  0.41047710136921123
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.23151456282016314
force_error_y:  0.41047710136921123
sobolev_term:  0.30529917614012897
REWARD:  -0.9472908403295033
--------
observation:  [-1.10143402  2.          0.         -1.          0.16828132 -1.2
 -1.2        -1.2         2.24856025  1.2101782   2.16828132]
timesteps:  185
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.082639929873814
fy_average:  0.30502651229700006
target_force_x:  2.0
target_force_

fx_average:  0.8534281447364439
fy_average:  0.6563152965251023
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1465718552635562
force_error_y:  0.6563152965251023
sobolev_term:  0.29508176718595097
REWARD:  -2.0979689189746096
--------
observation:  [-1.44199859  2.          0.         -1.          0.21193224 -1.2
 -1.2        -1.2         3.          2.67778945  2.21193224]
timesteps:  199
action_pos:  -1.2
action_vel:  2.6650291681289673
fx_average:  1.3957528763229736
fy_average:  0.4206139702077715
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6042471236770264
force_error_y:  0.4206139702077715
sobolev_term:  0.21740875945894697
REWARD:  -1.2422698533437448
--------
observation:  [-1.1806109   2.          0.         -1.          0.66502917 -1.2
 -1.2        -1.2         2.67778945  2.21193224  2.66502917]
timesteps:  200
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|    ep_

fx_average:  0.9672595941076937
fy_average:  0.2463119417485792
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0327404058923064
force_error_y:  0.2463119417485792
sobolev_term:  0.18883003600882758
REWARD:  -1.4678823836497132
--------
observation:  [-1.43003667  2.          0.         -1.          0.31650245 -1.2
 -1.2        -1.2         3.          3.          2.31650245]
timesteps:  212
action_pos:  -1.2
action_vel:  2.6546740531921387
fx_average:  1.3768285743016755
fy_average:  0.24142009059158226
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6231714256983245
force_error_y:  0.24142009059158226
sobolev_term:  0.1879587682155398
REWARD:  -1.0525502845054464
--------
observation:  [-1.39321899  2.          0.         -1.          0.65467405 -1.2
 -1.2        -1.2         3.          2.31650245  2.65467405]
timesteps:  213
action_pos:  -1.2
action_vel:  2.6853162050247192
fx_average:  1.1538906152719668
fy_average:  0.22176549157724718
target_force_x:  2

fx_average:  2.0388226180769
fy_average:  0.15673323683602894
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.03882261807690002
force_error_y:  0.15673323683602894
sobolev_term:  0.23524080884769133
REWARD:  -0.4307966637606203
--------
observation:  [-1.3150278   2.          0.         -1.          0.62542224 -1.2
 -1.2        -1.2         2.28263968  3.          2.62542224]
timesteps:  227
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.8260523687577628
fy_average:  0.2241397642572079
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.17394763124223722
force_error_y:  0.2241397642572079
sobolev_term:  0.23744254160022782
REWARD:  -0.635529937099673
--------
observation:  [-1.17059755  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.62542224  3.        ]
timesteps:  228
action_pos:  -1.2
action_vel:  2.8220779299736023
fx_average:  1.238247043686511
fy_average:  0.2575875443644356
target_force_x:  2.0
target_force_y: 

fx_average:  1.4057063328615167
fy_average:  0.6623316191526972
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5942936671384833
force_error_y:  0.6623316191526972
sobolev_term:  0.3773129066782895
REWARD:  -1.63393819296947
--------
observation:  [-1.23030405  2.          0.         -1.          0.64497781 -1.2
 -1.2        -1.2         3.          1.          2.64497781]
timesteps:  240
action_pos:  -1.2
action_vel:  1.5093063712120056
fx_average:  0.6694723987429141
fy_average:  1.0886135026366692
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.330527601257086
force_error_y:  1.0886135026366692
sobolev_term:  0.43171059842017034
REWARD:  -2.8508517023139257
--------
observation:  [-1.36918687  2.          0.         -1.         -0.49069363 -1.2
 -1.2        -1.2         1.          2.64497781  1.50930637]
timesteps:  241
action_pos:  -1.2
action_vel:  1.6787277460098267
fx_average:  0.8016119076870261
fy_average:  0.8950095540233548
target_force_x:  2.0
tar

action_pos:  -1.2
action_vel:  2.1268874406814575
fx_average:  0.7368594739292907
fy_average:  0.1278810480399829
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2631405260707091
force_error_y:  0.1278810480399829
sobolev_term:  0.5107746672827613
REWARD:  -1.9017962413934535
--------
observation:  [-1.16433915  2.          0.         -1.          0.12688744 -1.14967289
 -1.2        -1.2         2.58998001  2.25874272  2.12688744]
timesteps:  5
action_pos:  -1.2
action_vel:  2.826972246170044
fx_average:  1.1203968817121333
fy_average:  -0.3088773962659107
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8796031182878667
force_error_y:  -0.3088773962659107
sobolev_term:  0.4430448745236459
REWARD:  -1.6315253890774235
--------
observation:  [-1.24683021  2.          0.         -1.          0.82697225 -1.2
 -1.2        -1.2         2.25874272  2.12688744  2.82697225]
timesteps:  6
action_pos:  -1.2
action_vel:  1.0
fx_average:  0.7198897551645469
fy_average:  0.

fx_average:  1.3164592617821123
fy_average:  0.32950326397847135
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6835407382178877
force_error_y:  0.32950326397847135
sobolev_term:  0.23523606145717021
REWARD:  -1.2482800636535294
--------
observation:  [-1.42493526  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          1.53526491  3.        ]
timesteps:  18
action_pos:  -1.1752665996551512
action_vel:  3.0
fx_average:  1.1116554609099816
fy_average:  0.258652378235905
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8883445390900184
force_error_y:  0.258652378235905
sobolev_term:  0.21579273017817088
REWARD:  -1.3627896475040941
--------
observation:  [-1.38661417  2.          0.         -0.97938883  1.         -1.2
 -1.2        -1.1752666   1.53526491  3.          3.        ]
timesteps:  19
action_pos:  -1.2
action_vel:  1.885981798171997
fx_average:  0.6374364494191899
fy_average:  0.22477951986833972
target_force_x:  2.0
t

fx_average:  1.616800959032766
fy_average:  0.26710305379177135
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3831990409672339
force_error_y:  0.26710305379177135
sobolev_term:  0.31126211027854983
REWARD:  -0.961564205037555
--------
observation:  [-0.60176488  2.          0.         -1.          0.30151293 -1.2
 -0.44911094 -1.2         2.98931396  2.41484115  2.30151293]
timesteps:  33
action_pos:  -1.2
action_vel:  1.4833911657333374
fx_average:  0.9364543019979579
fy_average:  0.6975704103684367
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0635456980020421
force_error_y:  0.6975704103684367
sobolev_term:  0.3742515386603562
REWARD:  -2.1353676470308347
--------
observation:  [-1.20388202  2.          0.         -1.         -0.51660883 -0.44911094
 -1.2        -1.2         2.41484115  2.30151293  1.48339117]
timesteps:  34
action_pos:  -1.2
action_vel:  2.9909666180610657
fx_average:  1.3450016039793755
fy_average:  0.5841769618851084
target_force_x: 

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.7196295209724984
fy_average:  -0.047772808513783994
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2803704790275017
force_error_y:  -0.047772808513783994
sobolev_term:  0.038203854095571727
REWARD:  -1.3663471416368576
--------
observation:  [-1.22525636  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  47
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.6180717558931498
fy_average:  0.01410908198825221
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3819282441068501
force_error_y:  0.01410908198825221
sobolev_term:  0.034414859714398685
REWARD:  -1.430452185809501
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  48
action_pos:  0.1494389533996582
action_vel:  1.5423967242240906
fx_average:  1.7161178839405677
fy_average:  -0.27046

fx_average:  1.764279695176377
fy_average:  0.3754966972353674
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.23572030482362294
force_error_y:  0.3754966972353674
sobolev_term:  0.23199901916852234
REWARD:  -0.8432160212275126
--------
observation:  [-1.09317728  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.94908541  1.73352665  3.        ]
timesteps:  62
action_pos:  -1.2
action_vel:  1.8729833364486694
fx_average:  1.5920448690637938
fy_average:  0.3214553766707385
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.40795513093620617
force_error_y:  0.3214553766707385
sobolev_term:  0.21516478981841713
REWARD:  -0.9445752974253617
--------
observation:  [-1.36225597  2.          0.         -1.         -0.12701666 -1.2
 -1.2        -1.2         1.73352665  3.          1.87298334]
timesteps:  63
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.0256958029961967
fy_average:  0.3631121833930436
target_force_x:  2.0
target_force_y:

fx_average:  3.131590015016858
fy_average:  -0.1937801835781175
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  1.131590015016858
force_error_y:  -0.1937801835781175
sobolev_term:  0.25701934553368394
REWARD:  -1.5823895441286595
--------
observation:  [-0.51415986  2.          0.         -0.19828618  0.70130444 -1.2
 -1.2        -0.23794341  2.32346961  3.          2.70130444]
timesteps:  77
action_pos:  -1.2
action_vel:  2.4333617985248566
fx_average:  2.5631781042026556
fy_average:  0.14230252838297205
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.5631781042026556
force_error_y:  0.14230252838297205
sobolev_term:  0.3508101105729239
REWARD:  -1.0562907431585515
--------
observation:  [-0.30302566  2.          0.         -1.          0.4333618  -1.2
 -0.23794341 -1.2         3.          2.70130444  2.4333618 ]
timesteps:  78
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
|    ep_

fx_average:  1.1176291710444501
fy_average:  0.3110356174921682
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8823708289555499
force_error_y:  0.3110356174921682
sobolev_term:  0.20528925318183627
REWARD:  -1.3986956996295543
--------
observation:  [-1.38162664  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.64654422  3.          3.        ]
timesteps:  90
action_pos:  -1.2
action_vel:  1.764975666999817
fx_average:  1.5236161921807851
fy_average:  0.28333082039131907
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.47638380781921486
force_error_y:  0.28333082039131907
sobolev_term:  0.22136020964686706
REWARD:  -0.981074837857401
--------
observation:  [-1.6823493   2.          0.         -1.         -0.23502433 -1.2
 -1.2        -1.2         3.          3.          1.76497567]
timesteps:  91
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.8426520391802823
fy_average:  0.3541116400201275
target_force_x:  2.0
target_force_y:

fx_average:  1.9495463249591816
fy_average:  0.26345899316649624
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.050453675040818435
force_error_y:  0.26345899316649624
sobolev_term:  0.2115991258610816
REWARD:  -0.5255117940683962
--------
observation:  [-1.36184134  2.          0.         -1.          0.35654426 -1.10535836
 -1.2        -1.2         3.          2.01799142  2.35654426]
timesteps:  105
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.9105993914245847
fy_average:  0.2490758882568374
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.08940060857541532
force_error_y:  0.2490758882568374
sobolev_term:  0.20167566106695176
REWARD:  -0.5401521578992045
--------
observation:  [-1.29226723  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.01799142  2.35654426  3.        ]
timesteps:  106
action_pos:  -1.2
action_vel:  2.4181635081768036
fx_average:  1.7420627580383674
fy_average:  0.2696124995279024
target_force_x:  2.0
tar

fx_average:  1.4974339838871285
fy_average:  0.2146099250004175
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5025660161128715
force_error_y:  0.2146099250004175
sobolev_term:  0.2735880381696807
REWARD:  -0.9907639792829698
--------
observation:  [-1.10026764  2.          0.         -1.          0.69817793 -0.68242378
 -1.2        -1.2         3.          3.          2.69817793]
timesteps:  118
action_pos:  -1.2
action_vel:  1.0
fx_average:  1.2972513730034807
fy_average:  0.24901698590550286
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7027486269965193
force_error_y:  0.24901698590550286
sobolev_term:  0.28340779298860874
REWARD:  -1.235173405890631
--------
observation:  [-1.55360627  2.          0.         -1.         -1.         -1.2
 -1.2        -1.2         3.          2.69817793  1.        ]
timesteps:  119
action_pos:  -1.2
action_vel:  2.8669092655181885
fx_average:  2.017201686586191
fy_average:  0.15719714818393588
target_force_x:  2.0
target_

fx_average:  2.294523244476082
fy_average:  0.5419174020222843
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.2945232444760819
force_error_y:  0.5419174020222843
sobolev_term:  0.35014111126704234
REWARD:  -1.1865817577654085
--------
observation:  [-1.16500603  2.          0.         -1.          0.63494301 -1.2
 -0.66253867 -1.2         3.          3.          2.63494301]
timesteps:  133
action_pos:  -1.2
action_vel:  1.566082775592804
fx_average:  2.0816866485276635
fy_average:  0.5849749608275798
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.0816866485276635
force_error_y:  0.5849749608275798
sobolev_term:  0.37015158209527715
REWARD:  -1.0368131914505205
--------
observation:  [-1.41412579  2.          0.         -1.         -0.43391722 -0.66253867
 -1.2        -1.2         3.          2.63494301  1.56608278]
timesteps:  134
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.089741884068446
fy_average:  0.5255372365611065
target_force_x:  2.0
target_force

fx_average:  0.2570041913023322
fy_average:  0.8141856696699504
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.7429958086976678
force_error_y:  0.8141856696699504
sobolev_term:  0.27109306358742263
REWARD:  -2.828274541955041
--------
observation:  [-1.24347517  2.          0.         -1.         -0.03656918 -1.2
 -1.2        -1.2         1.63532275  3.          1.96343082]
timesteps:  146
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.527849913647196
fy_average:  0.3764668717046221
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.472150086352804
force_error_y:  0.3764668717046221
sobolev_term:  0.1808100087871683
REWARD:  -2.0294269668445946
--------
observation:  [-1.36828184  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          1.96343082  3.        ]
timesteps:  147
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9067427231754577
fy_average:  0.3156613239527902
target_force_x:  2.0
target_force_y:  0.0
force_error_

fx_average:  1.5595639433622681
fy_average:  0.17361035478067322
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4404360566377319
force_error_y:  0.17361035478067322
sobolev_term:  0.22060476859216652
REWARD:  -0.8346511800105716
--------
observation:  [-1.54826685  2.          0.         -1.         -0.77269632 -1.2
 -1.2        -1.2         3.          2.23495984  1.22730368]
timesteps:  161
action_pos:  -1.2
action_vel:  2.0407034754753113
fx_average:  1.550500516167944
fy_average:  0.19793023609352217
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.44949948383205607
force_error_y:  0.19793023609352217
sobolev_term:  0.2241826095289019
REWARD:  -0.8716123294544802
--------
observation:  [-1.13587515  2.          0.         -1.          0.04070348 -1.2
 -1.2        -1.2         2.23495984  1.22730368  2.04070348]
timesteps:  162
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6148425219763056
fy_average:  0.14447303238859255
target_force_x:  2.0
target_for

action_pos:  -1.2
action_vel:  1.7484134435653687
fx_average:  0.6308048569982737
fy_average:  0.12975454944946543
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3691951430017264
force_error_y:  0.12975454944946543
sobolev_term:  0.13934358184125756
REWARD:  -1.6382932742924494
--------
observation:  [-1.6235353   2.          0.         -1.         -0.25158656 -1.2
 -1.2        -1.2         3.          3.          1.74841344]
timesteps:  175
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.022558273752137
fy_average:  0.19359315190469212
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.977441726247863
force_error_y:  0.19359315190469212
sobolev_term:  0.16849390422814173
REWARD:  -1.3395287823806967
--------
observation:  [-1.21230218  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          1.74841344  3.        ]
timesteps:  176
action_pos:  -1.2
action_vel:  1.5667774081230164
fx_average:  0.9578627348922061
fy_average:  0.

fx_average:  1.5129952797787902
fy_average:  0.23410660329213878
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.4870047202212098
force_error_y:  0.23410660329213878
sobolev_term:  0.26720008395372746
REWARD:  -0.988311407467076
--------
observation:  [-1.55255041  2.          0.         -1.         -1.         -1.2
 -1.2        -1.2         2.9667688   3.          1.        ]
timesteps:  190
action_pos:  -1.2
action_vel:  2.010632634162903
fx_average:  1.8606018346138853
fy_average:  0.26792440060296685
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.13939816538611471
force_error_y:  0.26792440060296685
sobolev_term:  0.28848979153316373
REWARD:  -0.6958123575222452
--------
observation:  [-1.65860524  2.          0.         -1.          0.01063263 -1.2
 -1.2        -1.2         3.          1.          2.01063263]
timesteps:  191
action_pos:  -1.2
action_vel:  2.3640520572662354
fx_average:  1.6360557304883074
fy_average:  0.2357871621759215
target_force_x:  

fx_average:  1.8369947305438132
fy_average:  -0.04779769440085268
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.16300526945618676
force_error_y:  -0.04779769440085268
sobolev_term:  0.28562040981616443
REWARD:  -0.4964233736732039
--------
observation:  [ 0.03097707  2.          0.         -1.          1.         -1.2
 -0.19672766 -1.2         3.          3.          3.        ]
timesteps:  205
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.177075994322805
fy_average:  1.016348262726923
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.17707599432280485
force_error_y:  1.016348262726923
sobolev_term:  0.3948669599164147
REWARD:  -1.5882912169661425
--------
observation:  [-0.94995809  2.          0.         -1.          1.         -0.19672766
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  206
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
|    ep_rew_

fx_average:  1.6332487925240171
fy_average:  0.6475865922677008
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.3667512074759829
force_error_y:  0.6475865922677008
sobolev_term:  0.3118746815965859
REWARD:  -1.3262124813402694
--------
observation:  [-1.34137372  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          1.51156402  3.        ]
timesteps:  218
action_pos:  -1.2
action_vel:  1.5812759399414062
fx_average:  1.8064152634497266
fy_average:  0.5947312564049257
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.19358473655027342
force_error_y:  0.5947312564049257
sobolev_term:  0.29178150739789294
REWARD:  -1.0800975003530922
--------
observation:  [-1.32324572  2.          0.         -1.         -0.41872406 -1.2
 -1.2        -1.2         1.51156402  3.          1.58127594]
timesteps:  219
action_pos:  -1.2
action_vel:  1.5223585367202759
fx_average:  1.6393617964632297
fy_average:  0.31490077763264224
target_force_x:  2.

fx_average:  0.9859073485617299
fy_average:  0.045904302648097674
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0140926514382702
force_error_y:  0.045904302648097674
sobolev_term:  0.08842503409209503
REWARD:  -1.148421988178463
--------
observation:  [-1.23831164  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          3.          3.        ]
timesteps:  233
action_pos:  -1.2
action_vel:  2.0534225702285767
fx_average:  1.2517660414684664
fy_average:  0.14910656398645084
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.7482339585315336
force_error_y:  0.14910656398645084
sobolev_term:  0.1438198408737058
REWARD:  -1.0411603633916902
--------
observation:  [-1.53987983  2.          0.         -1.          0.05342257 -1.2
 -1.2        -1.2         3.          3.          2.05342257]
timesteps:  234
action_pos:  -1.2
action_vel:  2.5416669249534607
fx_average:  1.588876906288767
fy_average:  0.21573745750264764
target_force_x: 

fx_average:  1.3886336929220393
fy_average:  1.2049825364550029
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6113663070779607
force_error_y:  1.2049825364550029
sobolev_term:  0.3513297998896869
REWARD:  -2.1676786434226503
--------
observation:  [-1.32481896  2.          0.         -1.          0.46209589 -1.17518735
 -1.2        -1.2         2.89954221  2.31424657  2.46209589]
timesteps:  246
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.443542808052375
fy_average:  1.093751196647892
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5564571919476251
force_error_y:  1.093751196647892
sobolev_term:  0.2336494731802778
REWARD:  -1.883857861775795
--------
observation:  [-1.25086505  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.31424657  2.46209589  3.        ]
timesteps:  247
action_pos:  -1.2
action_vel:  2.4095807671546936
fx_average:  1.37115197229657
fy_average:  0.31874893704351187
target_force_x:  2.0
target_force_y

fx_average:  0.9991922706453101
fy_average:  0.12746146913304338
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.00080772935469
force_error_y:  0.12746146913304338
sobolev_term:  0.20421485809177087
REWARD:  -1.3324840565795042
--------
observation:  [-1.29917955  2.          0.         -1.          0.77824676 -1.2
 -1.04492397 -1.2         2.42403594  3.          2.77824676]
timesteps:  11
action_pos:  -1.2
action_vel:  1.1275678873062134
fx_average:  0.7294731563982646
fy_average:  0.112681412942432
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.2705268436017354
force_error_y:  0.112681412942432
sobolev_term:  0.21854631337252828
REWARD:  -1.6017545699166957
--------
observation:  [-1.50055331  2.          0.         -1.         -0.87243211 -1.04492397
 -1.2        -1.2         3.          2.77824676  1.12756789]
timesteps:  12
action_pos:  -1.2
action_vel:  1.4148627519607544
fx_average:  1.0656479551535134
fy_average:  0.07750669332017095
target_force_x: 

fx_average:  0.6829960997621538
fy_average:  0.8514328127426165
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.3170039002378462
force_error_y:  0.8514328127426165
sobolev_term:  0.21797260659805903
REWARD:  -2.386409319578522
--------
observation:  [-1.31954399  2.          0.         -1.          0.88913071 -1.2
 -1.2        -1.2         3.          2.24867725  2.88913071]
timesteps:  24
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.298448383445929
fy_average:  -0.0010589374541561606
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.701551616554071
force_error_y:  -0.0010589374541561606
sobolev_term:  0.14337815581218338
REWARD:  -0.8459887098204105
--------
observation:  [-1.38487209  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         2.24867725  2.88913071  3.        ]
timesteps:  25
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.5376324589587833
fy_average:  0.02983525954171417
target_force_x:  2.0
target_force_y:  0.0
forc

fx_average:  0.4794466950936775
fy_average:  1.387308286577596
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.5205533049063225
force_error_y:  1.387308286577596
sobolev_term:  0.39036049708414733
REWARD:  -3.2982220885680658
--------
observation:  [-1.21763333  2.          0.         -1.         -0.46388257 -1.2
 -1.2        -1.2         1.17360657  3.          1.53611743]
timesteps:  39
action_pos:  -1.2
action_vel:  2.743022859096527
fx_average:  0.5813759149607449
fy_average:  1.3597338984273133
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.418624085039255
force_error_y:  1.3597338984273133
sobolev_term:  0.40127151604499944
REWARD:  -3.179629499511568
--------
observation:  [-1.25809564  2.          0.         -1.          0.74302286 -1.2
 -1.2        -1.2         3.          1.53611743  2.74302286]
timesteps:  40
action_pos:  -1.2
action_vel:  2.2498519122600555
fx_average:  0.18082144968585395
fy_average:  1.211789310481388
target_force_x:  2.0
target

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9604218495015446
fy_average:  0.6483693235646537
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0395781504984554
force_error_y:  0.6483693235646537
sobolev_term:  0.2664709064942967
REWARD:  -1.9544183805574058
--------
observation:  [-1.2234479   2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.72014481  3.        ]
timesteps:  53
action_pos:  -1.2
action_vel:  1.8815107345581055
fx_average:  1.4263325488138032
fy_average:  0.5981965842934994
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5736674511861968
force_error_y:  0.5981965842934994
sobolev_term:  0.29129353691735943
REWARD:  -1.4631575723970558
--------
observation:  [-1.53167993  2.          0.         -1.         -0.11848927 -1.2
 -1.2        -1.2         2.72014481  3.          1.88151073]
timesteps:  54
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.6551801063437666
fy_average:  0.5920530187823695
tar

action_pos:  -1.2
action_vel:  1.6792899370193481
fx_average:  1.7685438429621574
fy_average:  0.2867758014225728
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.2314561570378426
force_error_y:  0.2867758014225728
sobolev_term:  0.1911881622749529
REWARD:  -0.7094201207353683
--------
observation:  [-1.11320285  2.          0.         -1.         -0.32071006 -1.2
 -1.2        -1.2         1.81497496  1.71062732  1.67928994]
timesteps:  68
action_pos:  -1.2
action_vel:  2.19235897064209
fx_average:  1.0227623476008352
fy_average:  0.346513394251075
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9772376523991648
force_error_y:  0.346513394251075
sobolev_term:  0.17065539145244824
REWARD:  -1.494406438102688
--------
observation:  [-1.17414667  2.          0.         -1.          0.19235897 -1.2
 -1.2        -1.2         1.71062732  1.67928994  2.19235897]
timesteps:  69
action_pos:  -1.2
action_vel:  2.7414669394493103
fx_average:  1.2137077795708047
fy_average

fx_average:  0.38411754098812984
fy_average:  1.4832125158252372
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.61588245901187
force_error_y:  1.4832125158252372
sobolev_term:  0.5535778134990981
REWARD:  -3.652672788336205
--------
observation:  [-1.62169962  2.          0.         -1.         -0.87242103 -1.2
 -1.2        -1.2         3.          2.52125573  1.12757897]
timesteps:  83
action_pos:  -1.2
action_vel:  2.3535079061985016
fx_average:  1.3560253237634488
fy_average:  0.6900873284120175
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6439746762365512
force_error_y:  0.6900873284120175
sobolev_term:  0.3703133704570525
REWARD:  -1.7043753751056212
--------
observation:  [-1.37158758  2.          0.         -1.          0.35350791 -1.2
 -1.2        -1.2         2.52125573  1.12757897  2.35350791]
timesteps:  84
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 250          |
|    ep_rew

fx_average:  1.4225601103868026
fy_average:  0.1598346020615101
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5774398896131974
force_error_y:  0.1598346020615101
sobolev_term:  0.46060905245843037
REWARD:  -1.197883544133138
--------
observation:  [-1.22641206  2.          0.         -1.          1.         -0.36724334
 -1.2        -1.2         1.87434417  3.          3.        ]
timesteps:  96
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.4561573569424942
fy_average:  0.09907016816591467
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5438426430575058
force_error_y:  0.09907016816591467
sobolev_term:  0.4574270435709793
REWARD:  -1.1003398547943997
--------
observation:  [-1.1397179  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  97
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.1958255144087304
fy_average:  0.05180414101804141
target_force_x:  2.0
target_force_y:  0.0
force_error_

fx_average:  1.1469150773016876
fy_average:  0.1950941227635233
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8530849226983124
force_error_y:  0.1950941227635233
sobolev_term:  0.19955486957899168
REWARD:  -1.2477339150408273
--------
observation:  [-1.24463873  2.          0.         -1.          0.62446994 -1.2
 -1.2        -1.2         3.          3.          2.62446994]
timesteps:  111
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9181867821720776
fy_average:  0.14809390269824219
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0818132178279223
force_error_y:  0.14809390269824219
sobolev_term:  0.1734013704928637
REWARD:  -1.403308491019028
--------
observation:  [-1.26198903  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.62446994  3.        ]
timesteps:  112
action_pos:  -1.2
action_vel:  1.117641806602478
fx_average:  1.0466187128030597
fy_average:  0.12842732964840464
target_force_x:  2.0
target_force_y

fx_average:  1.081387110699389
fy_average:  0.25621235339607984
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9186128893006109
force_error_y:  0.25621235339607984
sobolev_term:  0.1767606557295279
REWARD:  -1.3515858984262186
--------
observation:  [-1.36248147  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.13521248  3.        ]
timesteps:  124
action_pos:  -1.2
action_vel:  1.8925474286079407
fx_average:  1.1208055446420369
fy_average:  0.13565669109566186
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.8791944553579631
force_error_y:  0.13565669109566186
sobolev_term:  0.15928945307777473
REWARD:  -1.1741405995313996
--------
observation:  [-1.34817442  2.          0.         -1.         -0.10745257 -1.2
 -1.2        -1.2         2.13521248  3.          1.89254743]
timesteps:  125
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.8209071109692267
fy_average:  0.10873590822414102
target_force_x:  2.0
target_forc

fx_average:  1.4380524904492455
fy_average:  0.8233015569161841
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.5619475095507545
force_error_y:  0.8233015569161841
sobolev_term:  0.33619374589414863
REWARD:  -1.7214428123610872
--------
observation:  [-1.25145636  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.          3.          3.        ]
timesteps:  139
action_pos:  -1.2
action_vel:  2.1593874096870422
fx_average:  1.597940695538367
fy_average:  0.8594441927466183
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.40205930446163296
force_error_y:  0.8594441927466183
sobolev_term:  0.35124691077468284
REWARD:  -1.6127504079829342
--------
observation:  [-1.5605887   2.          0.         -1.          0.15938741 -1.2
 -1.2        -1.2         3.          3.          2.15938741]
timesteps:  140
action_pos:  -1.2
action_vel:  2.4656078219413757
fx_average:  2.0854867250011466
fy_average:  0.8464834622071167
target_force_x:  2.0

fx_average:  1.8502285192822956
fy_average:  0.8883023610578871
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.1497714807177044
force_error_y:  0.8883023610578871
sobolev_term:  0.37738762798787867
REWARD:  -1.4154614697634702
--------
observation:  [-1.51783379  2.          0.         -1.          0.25048327 -1.2
 -1.2        -1.2         3.          3.          2.25048327]
timesteps:  152
action_pos:  -1.2
action_vel:  1.631602942943573
fx_average:  2.190822133052085
fy_average:  0.8889688106654586
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.19082213305208517
force_error_y:  0.8889688106654586
sobolev_term:  0.3780270423974314
REWARD:  -1.4578179861149752
--------
observation:  [-1.5118942   2.          0.         -1.         -0.36839706 -1.2
 -1.2        -1.2         3.          2.25048327  1.63160294]
timesteps:  153
action_pos:  -1.2
action_vel:  2.5114355087280273
fx_average:  1.7963100987880414
fy_average:  0.8888045878188962
target_force_x:  2.0
ta

action_pos:  -1.2
action_vel:  2.672030448913574
fx_average:  0.8408233400070634
fy_average:  0.04367767972921999
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.1591766599929367
force_error_y:  0.04367767972921999
sobolev_term:  0.0897577504832785
REWARD:  -1.2926120902054352
--------
observation:  [-1.33079817  2.          0.         -1.          0.67203045 -1.2
 -1.2        -1.2         3.          3.          2.67203045]
timesteps:  167
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.8698865613819089
fy_average:  0.04034231063950203
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.130113438618091
force_error_y:  0.04034231063950203
sobolev_term:  0.07808813491951848
REWARD:  -1.2485438841771115
--------
observation:  [-1.25287151  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         3.          2.67203045  3.        ]
timesteps:  168
action_pos:  -1.2
action_vel:  2.312463402748108
fx_average:  1.1295072646906819
fy_average:  0.05

action_pos:  -1.2
action_vel:  3.0
fx_average:  0.9581066487464394
fy_average:  0.30499366622718194
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.0418933512535606
force_error_y:  0.30499366622718194
sobolev_term:  0.16537860601904703
REWARD:  -1.5122656234997895
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  181
action_pos:  -1.2
action_vel:  2.3669625520706177
fx_average:  1.3266557618497081
fy_average:  0.3345003981180536
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.6733442381502919
force_error_y:  0.3345003981180536
sobolev_term:  0.1832819577017343
REWARD:  -1.19112659397008
--------
observation:  [-1.45600957  2.          0.         -1.          0.36696255 -1.2
 -1.2        -1.2         3.          3.          2.36696255]
timesteps:  182
action_pos:  -1.2
action_vel:  3.0
fx_average:  1.7396034140266032
fy_average:  0.12649434069036378
target_for

fx_average:  0.3936338366690073
fy_average:  0.30409769176489426
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.6063661633309927
force_error_y:  0.30409769176489426
sobolev_term:  0.14016182108518693
REWARD:  -2.050625676181074
--------
observation:  [-1.23201615  2.          0.         -1.          0.82215047 -1.2
 -1.2        -1.2         3.          3.          2.82215047]
timesteps:  196
action_pos:  -1.2
action_vel:  1.8149963021278381
fx_average:  1.0147794395639855
fy_average:  0.3196796407102739
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.9852205604360145
force_error_y:  0.3196796407102739
sobolev_term:  0.19716773108237948
REWARD:  -1.5020679322286679
--------
observation:  [-1.57705737  2.          0.         -1.         -0.1850037  -1.2
 -1.2        -1.2         3.          2.82215047  1.8149963 ]
timesteps:  197
action_pos:  -1.2
action_vel:  1.0
fx_average:  1.8330927355102058
fy_average:  0.1887541635635788
target_force_x:  2.0
target_force_

fx_average:  1.9973034210601122
fy_average:  1.0274746100405951
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -0.0026965789398878393
force_error_y:  1.0274746100405951
sobolev_term:  0.4630222231487732
REWARD:  -1.4931934121292563
--------
observation:  [-1.37555543  2.          0.         -1.         -0.62995291 -0.80403399
 -1.2        -1.2         2.70675093  1.87945169  1.37004709]
timesteps:  211
action_pos:  -1.2
action_vel:  3.0
fx_average:  2.002841871127112
fy_average:  0.9338333459599357
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  0.0028418711271118724
force_error_y:  0.9338333459599357
sobolev_term:  0.3865849679013023
REWARD:  -1.3232601849883499
--------
observation:  [-0.74615239  2.          0.         -1.          1.         -1.2
 -1.2        -1.2         1.87945169  1.37004709  3.        ]
timesteps:  212
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 250         |
|    ep_rew_me

fx_average:  0.522200145508105
fy_average:  0.029842266903342813
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.477799854491895
force_error_y:  0.029842266903342813
sobolev_term:  0.028794733008576847
REWARD:  -1.536436854403815
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  224
action_pos:  -1.2
action_vel:  3.0
fx_average:  0.4089609663484751
fy_average:  0.04008759229404822
target_force_x:  2.0
target_force_y:  0.0
force_error_x:  -1.591039033651525
force_error_y:  0.04008759229404822
sobolev_term:  0.015877786674099455
REWARD:  -1.6470044126196728
--------
observation:  [-1.2319786  2.         0.        -1.         1.        -1.2
 -1.2       -1.2        3.         3.         3.       ]
timesteps:  225
action_pos:  -1.2
action_vel:  2.2613015174865723
fx_average:  0.5738604968735076
fy_average:  0.12030813080718124
target_force_x:  2.0
target_force_y:  0.0
force_er