In [1]:
import gymnasium as gym
import numpy as np
from ot2_gym_wrapper import OT2Env

def test_environment():
    """
    Test the OT2Env environment by running it for 1,000 steps or until termination.
    Logs key metrics such as cumulative reward, steps, and termination status.
    """
    # Initialize the environment
    environment = OT2Env(render=False, max_steps=1000)

    # Reset the environment and retrieve initial observation and info
    observation, info = environment.reset()
    print("Initial Observation:", observation)

    cumulative_reward = 0

    for step_count in range(1000):
        # Sample a random action from the action space
        action = environment.action_space.sample()

        # Take a step in the environment
        observation, reward, done, truncated, info = environment.step(action)

        # Accumulate the reward
        cumulative_reward += reward

        # Log step information
        print(f"Step {step_count + 1}:")
        print(f"  Action: {action}")
        print(f"  Observation: {observation}")
        print(f"  Reward: {reward}")
        print(f"  Done: {done}")
        print(f"  Truncated: {truncated}")

        # Break the loop if the episode terminates or is truncated
        if done or truncated:
            print("Episode ended early.")
            break

    # Log final results
    print(f"Cumulative Reward: {cumulative_reward}")
    print(f"Total Steps: {step_count + 1}")

    # Close the environment
    environment.close()

if __name__ == "__main__":
    test_environment()


Initial Observation: [ 0.073       0.0895      0.1195     -0.12632658  0.02507938  0.284291  ]
Step 1:
  Action: [ 0.09548919  0.7131886  -0.81715316]
  Observation: [ 0.0734      0.0901      0.1195     -0.12632658  0.02507938  0.284291  ]
  Reward: 0.55021585676661
  Done: False
  Truncated: False
Step 2:
  Action: [ 0.56454253 -0.3930701  -0.28379858]
  Observation: [ 0.0743      0.0901      0.1195     -0.12632658  0.02507938  0.284291  ]
  Reward: 0.549092987972798
  Done: False
  Truncated: False
Step 3:
  Action: [-0.1932973  -0.11090175  0.549308  ]
  Observation: [ 0.0747      0.0896      0.1205     -0.12632658  0.02507938  0.284291  ]
  Reward: 0.549817800648105
  Done: False
  Truncated: False
Step 4:
  Action: [ 0.47786552 -0.73156583  0.31661022]
  Observation: [ 0.0755      0.0886      0.1218     -0.12632658  0.02507938  0.284291  ]
  Reward: 0.5505351655976073
  Done: False
  Truncated: False
Step 5:
  Action: [-0.7057638   0.61124545 -0.49125466]
  Observation: [ 0.0759  