In [2]:
import gymnasium as gym
from gym.wrappers import TimeLimit

import matplotlib.pyplot as plt

In [3]:
gym.envs.registry.keys()

dict_keys(['CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v1', 'Acrobot-v1', 'phys2d/CartPole-v0', 'phys2d/CartPole-v1', 'phys2d/Pendulum-v0', 'LunarLander-v2', 'LunarLanderContinuous-v2', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3', 'CarRacing-v2', 'Blackjack-v1', 'FrozenLake-v1', 'FrozenLake8x8-v1', 'CliffWalking-v0', 'Taxi-v3', 'tabular/Blackjack-v0', 'tabular/CliffWalking-v0', 'Reacher-v2', 'Reacher-v4', 'Pusher-v2', 'Pusher-v4', 'InvertedPendulum-v2', 'InvertedPendulum-v4', 'InvertedDoublePendulum-v2', 'InvertedDoublePendulum-v4', 'HalfCheetah-v2', 'HalfCheetah-v3', 'HalfCheetah-v4', 'Hopper-v2', 'Hopper-v3', 'Hopper-v4', 'Swimmer-v2', 'Swimmer-v3', 'Swimmer-v4', 'Walker2d-v2', 'Walker2d-v3', 'Walker2d-v4', 'Ant-v2', 'Ant-v3', 'Ant-v4', 'Humanoid-v2', 'Humanoid-v3', 'Humanoid-v4', 'HumanoidStandup-v2', 'HumanoidStandup-v4', 'GymV26Environment-v0', 'GymV21Environment-v0', 'Adventure-v0', 'AdventureDeterministic-v0', 'AdventureNoFrameskip-v0

## The classic “agent-environment loop” simplified used in gymnasium
![Alt text](img.png  "a title")

In [33]:
import gymnasium as gym
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('TkAgg')  # or 'Qt5Agg' if you prefer Qt

max_steps = 100
min_action = -1.0
max_action = 1.0

env = gym.make('Pendulum-v1', render_mode='rgb_array')
env = gym.wrappers.TimeLimit(env, max_episode_steps=max_steps)
obs, info = env.reset()

plt.ion()  # Turn on interactive mode
fig, ax = plt.subplots(figsize=(10, 6))
img = ax.imshow(env.render())
step_text = ax.text(5, 17, '', color='white', fontsize=12, bbox=dict(facecolor='blue', alpha=0.8))


actions_arr = []
rewards = 0
for step in range(200):
    action = env.action_space.sample()
    actions_arr.append(action)
    obs, reward, terminated, truncated, info = env.step(action)
        
    frame = env.render()
    img.set_data(frame)
    step_text.set_text(f'Step: {step + 1}')
    fig.canvas.draw()
    fig.canvas.flush_events()
    
    rewards += reward
    print(f'step {step}, reward: {reward}')
    if terminated or truncated:
        print(f"terminated {terminated}, truncated {truncated} reward {rewards}")
        obs, info = env.reset()
        print(info)
        rewards = 0

env.close()
plt.ioff()  # Turn off interactive mode
plt.show()  # Keep the window open after the animation finishes
# plt.close()

step 0, reward: -0.09912636435753835
step 1, reward: -0.12933191321329895
step 2, reward: -0.2043503219652053
step 3, reward: -0.246110059335057
step 4, reward: -0.34724687847192137
step 5, reward: -0.5670429131255437
step 6, reward: -0.7069082685571273
step 7, reward: -1.1225908110244094
step 8, reward: -1.5251746613564137
step 9, reward: -2.258701817501286
step 10, reward: -3.2959199839078224
step 11, reward: -4.909162939115977
step 12, reward: -6.990447660811445
step 13, reward: -9.021630365265136
step 14, reward: -11.472768561362267
step 15, reward: -13.759077594468609
step 16, reward: -14.573706279327249
step 17, reward: -12.303801410571047
step 18, reward: -10.25019556524649
step 19, reward: -8.074191471236217
step 20, reward: -6.188607415303987
step 21, reward: -4.623711250264472
step 22, reward: -3.333432300651243
step 23, reward: -2.4505095042622997
step 24, reward: -1.7842610850455514
step 25, reward: -1.1478288889743815
step 26, reward: -0.7738586807198305
step 27, reward: -

In [5]:
print(rewards)

0.0


In [6]:
import numpy as np
actions_arr = np.array(actions_arr)
unique, counts = np.unique(actions_arr, return_counts=True)
action_counts = dict(zip(unique, counts))

print(f'counts : {action_counts} \ntotal {np.sum(counts)}')

counts : {2: 2, 3: 2, 4: 2, 6: 1, 7: 1, 8: 1, 9: 1, 10: 1, 11: 2, 13: 1, 14: 1, 15: 3, 16: 2} 
total 20


In [50]:
import sys
sys.path.append("C:/Users/Click/.mujoco/mujoco210/bin")  # Replace with your actual MuJoCo path

env = gym.make('Pendulum-v1', render_mode='rgb_array')
obs,info = env.reset()
print(f'action space shape : {env.action_space}') # Number of possible actions is 4
print(f'observation space shape : {env.observation_space.shape}') 
print(f'reward range : {env.reward_range}') # default reward range is set to -inf +inf
print(f'\nEnv specs : {env.spec}') 
print(f'\nEnv metadata : {env.metadata}') 
env.close()

action space shape : Box(-2.0, 2.0, (1,), float32)
observation space shape : (3,)
reward range : (-inf, inf)

Env specs : EnvSpec(id='Pendulum-v1', entry_point='gymnasium.envs.classic_control.pendulum:PendulumEnv', reward_threshold=None, nondeterministic=False, max_episode_steps=200, order_enforce=True, autoreset=False, disable_env_checker=False, apply_api_compatibility=False, kwargs={'render_mode': 'rgb_array'}, namespace=None, name='Pendulum', version=1, additional_wrappers=(), vector_entry_point=None)

Env metadata : {'render_modes': ['human', 'rgb_array'], 'render_fps': 30}


In [53]:
gym.register()

TypeError: register() missing 1 required positional argument: 'id'