In [1]:
import gymnasium as gym
import time

# Create an instance of the CartPole environment
# Set render_mode to 'human' to render the environment in a window
env = gym.make('CartPole-v1', render_mode='rgb_array') # Set render_mode to 'rgb_array' to render the environment as an image array

# Reset the environment and get the initial state
observation, info = env.reset(seed=42) # Set seed to get the same initial state every time

# Run the simulation for 1000 steps
for _ in range(1000):   
    #if you want to render the environment as an image array use the following code
    #img = env.render() # This will store the image array in the variable img instead of rendering it in a window

    # Take a random action by sampling from the action space
    action = env.action_space.sample()
    
    # Execute the action and get the next state, reward, and whether the episode is done. 
    # Terminated is True if the episode is done and False otherwise, Truncated is True if the episode was terminated because the time limit was reached.
    observation, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated

    # add a delay to slow down the env render
    # This is purely for visualization purposes, DO NOT use this when training!
    # time.sleep(0.05)
    
    # If the episode is done, reset the environment
    if done:
        state = env.reset()

In [2]:
# Observation and action space 
obs_space = env.observation_space
action_space = env.action_space
print("The observation space: {}".format(obs_space))
print("The action space: {}".format(action_space))


The observation space: Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32)
The action space: Discrete(2)


In [3]:
import matplotlib.pyplot as plt 

# reset the environment and see the initial observation
obs = env.reset()
print("The initial observation is {}".format(obs))

# Sample a random action from the entire action space
random_action = env.action_space.sample()

# # Take the action and get the new observation space
new_obs, reward, term, trun, info = env.step(random_action)
print("The new observation is {}".format(new_obs))

The initial observation is (array([-0.03922591,  0.04160118, -0.0269786 , -0.04625874], dtype=float32), {})
The new observation is [-0.03839388  0.2370994  -0.02790378 -0.3473301 ]


In [4]:
import sys
sys.path.append('../OT2_Twin')
from sim_class import Simulation

pybullet build time: Oct 14 2023 15:59:43


In [5]:
x_range = (-0.187, 0.253)
y_range = (-0.1705, 0.2195)
z_range = (0.1195, 0.2895)
xyz_low = (x_range[0], y_range[0] ,z_range[0])
xyz_high = (x_range[1], y_range[1] ,z_range[1])

xyz_low*2

(-0.187, -0.1705, 0.1195, -0.187, -0.1705, 0.1195)

In [6]:
import numpy as np

obs = [0,1,2]
goal_position = [3,4,5]
reward = obs *2
reward

[0, 1, 2, 0, 1, 2]

In [7]:
np.concatenate([obs, goal_position])

array([0, 1, 2, 3, 4, 5])