In [None]:
#want tos et upp dev branch, small change
import gymnasium as gym
from pathlib import Path
from gymnasium.wrappers import RecordVideo

In [76]:
docs_path = Path("../../documentation/cartpole/random-baseline") # ../ makes it so it writes to a directory one back from current one

video_dir = docs_path / "videos"
graphs_dir  = docs_path / "graphs"
report_file = docs_path / "random_baseline.md"

graphs_dir.mkdir(parents=True, exist_ok=True)
docs_path.mkdir(parents=True, exist_ok=True)
video_dir.mkdir(parents=True, exist_ok=True)

In [77]:
env = gym.make("CartPole-v1", render_mode="rgb_array") 
env = RecordVideo(
    env,
    video_folder=str(video_dir),
    episode_trigger=lambda e: True,
    name_prefix="cartpole_random_baseline" 
)

In [78]:
# Reset environment
observation, info = env.reset(seed=42)

print(f"Action space: {env.action_space}") #discrete(2), can only go left or right
print(f"Observation space: {env.observation_space}")  # Box(4D values), essentially all the agent can see
print(f"Starting observation: {observation}")
print(f"maxiumum number of steps per episode: {env.spec.max_episode_steps}")

Action space: Discrete(2)
Observation space: Box([-4.8               -inf -0.41887903        -inf], [4.8               inf 0.41887903        inf], (4,), float32)
Starting observation: [ 0.0273956  -0.00611216  0.03585979  0.0197368 ]
maxiumum number of steps per episode: 500


In [79]:
step = 0
total_reward = 0
episode_over = False

In [80]:
labels = ["cart position", "cart velocity", "pole angle", "pole angular velocity"]

while not episode_over:
    step += 1
    action = env.action_space.sample()  
    observation, reward, terminated, truncated, info = env.step(action)

    total_reward += reward
    episode_over = terminated or truncated

    print(f"Step {step}:")
    print(f"Action taken: {action}")

    for label, observe in zip(labels,observation):
        print(f"{label}: {observe}")

    print(f"Reward: {reward}")
    print(f"Terminated: {terminated}, Truncated: {truncated}")
    print("-" * 50)

print(f"Episode finished! Total reward: {total_reward}")
env.close()

Step 1:
Action taken: 0
cart position: 0.02727336250245571
cart velocity: -0.20172953605651855
pole angle: 0.036254528909921646
pole angular velocity: 0.32351475954055786
Reward: 1.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 2:
Action taken: 1
cart position: 0.02323877066373825
cart velocity: -0.007142078131437302
pole angle: 0.04272482171654701
pole angular velocity: 0.042481862008571625
Reward: 1.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 3:
Action taken: 0
cart position: 0.023095929995179176
cart velocity: -0.20284982025623322
pole angle: 0.043574459850788116
pole angular velocity: 0.34833285212516785
Reward: 1.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 4:
Action taken: 1
cart position: 0.019038932397961617
cart velocity: -0.008373845368623734
pole angle: 0.050541117787361145
pole angular velocity: 0.0697026327252388
Reward: 1.0
Term

In [81]:
obs_explanation = """\
**Observation vector (4 values):**
1. **Cart Position (m)** — horizontal position on the track (≈ -4.8 to +4.8).
2. **Cart Velocity (m/s)** — how fast the cart moves (unbounded float in practice).
3. **Pole Angle (rad)** — tilt of the pole relative to vertical (≈ -0.4189 to +0.4189 rad ≈ ±24°).
4. **Pole Angular Velocity (rad/s)** — how fast the pole is rotating (unbounded float in practice).
"""

failure_conditions = """\
**Episode ends when (termination/truncation):**
- **Pole tilt exceeds ±0.4189 rad (~±24°)** → `terminated = True`
- **Cart position leaves track bounds (≈ ±4.8 m)** → `terminated = True`
- **Time limit of 500 steps is reached** → `truncated = True`
"""

with open(report_file, "w")as f:
    f.write("# SCRUM-15: Researching Cartpole test write\n\n")
    f.write("## Environment Details\n")
    f.write(f"- Action space: {env.action_space}\n")
    f.write(f"- Observation space: {env.observation_space}\n")
    f.write(f"- Maximum steps per episode: {env.spec.max_episode_steps}\n\n")

    f.write("## Observation Meaning\n")
    f.write(obs_explanation + "\n")

    f.write("## Failure Conditions\n")
    f.write(failure_conditions + "\n")


    f.write("## Example Run\n")
    f.write(f"- Starting observation: {observation.tolist()}\n")
    f.write(f"- Total reward: {total_reward}\n")
