In [41]:
import gymnasium as gym
import qwop_gym  # This registers the environment
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import CheckpointCallback


In [42]:
print(dir(qwop_gym))
# Check available environments

print(gym.envs.registry.keys())  # Look for qwop entries

['QwopEnv', 'RecordWrapper', 'VerboseWrapper', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'all', 'envs', 'gymnasium', 'wrappers']
dict_keys(['CartPole-v0', 'CartPole-v1', 'MountainCar-v0', 'MountainCarContinuous-v0', 'Pendulum-v1', 'Acrobot-v1', 'phys2d/CartPole-v0', 'phys2d/CartPole-v1', 'phys2d/Pendulum-v0', 'LunarLander-v2', 'LunarLanderContinuous-v2', 'BipedalWalker-v3', 'BipedalWalkerHardcore-v3', 'CarRacing-v2', 'Blackjack-v1', 'FrozenLake-v1', 'FrozenLake8x8-v1', 'CliffWalking-v0', 'Taxi-v3', 'tabular/Blackjack-v0', 'tabular/CliffWalking-v0', 'Reacher-v2', 'Reacher-v4', 'Pusher-v2', 'Pusher-v4', 'InvertedPendulum-v2', 'InvertedPendulum-v4', 'InvertedDoublePendulum-v2', 'InvertedDoublePendulum-v4', 'HalfCheetah-v2', 'HalfCheetah-v3', 'HalfCheetah-v4', 'Hopper-v2', 'Hopper-v3', 'Hopper-v4', 'Swimmer-v2', 'Swimmer-v3', 'Swimmer-v4', 'Walker2d-v2', 'Walker2d-v3', 'Walker2d-v4', 'Ant-v2', 'Ant-v3', 'Ant-v4', '

In [43]:
# Create/recreate the QWOP environment
browser_path = "C:\\Program Files\\BraveSoftware\\Brave-Browser\\Application\\brave.exe"
driver_path = "C:\\Program Files\\chromedriver-win64\\chromedriver-win64\\chromedriver.exe"

# Close existing environment if it exists
try:
    env.close()
    print("Closed existing environment")
except:
    pass

# Create new environment with visualization options
env = qwop_gym.QwopEnv(
    browser=browser_path, 
    driver=driver_path,
    stat_in_browser=True,  # Show statistics in browser
    game_in_browser=True,  # Show the game (default)
    auto_draw=True         # Automatically render each frame
)
print("Environment created successfully!")
print("Check your Brave browser - the QWOP game should be visible!")

Closed existing environment
Environment created successfully!
Check your Brave browser - the QWOP game should be visible!
Environment created successfully!
Check your Brave browser - the QWOP game should be visible!


In [44]:
# Test environment interaction with proper qwop-gym API
import time

# Action space mapping (for 16 actions - full action set)
action_map = {
    0: "none",
    1: "Q",
    2: "W", 
    3: "O",
    4: "P",
    5: "Q+W",
    6: "Q+O",
    7: "Q+P",
    8: "W+O",
    9: "W+P",
    10: "O+P",
    11: "Q+W+O",
    12: "Q+W+P",
    13: "Q+O+P",
    14: "W+O+P",
    15: "Q+W+O+P"
}

print(f"Environment action space size: {env.action_space.n}")
print(f"Action mapping: {action_map}\n")

# Reset environment - returns (observation, info) tuple
observation, info = env.reset()
print(f"Initial observation shape: {observation.shape}")
print(f"Initial observation (first 10 values): {observation[:10]}")
print(f"Initial info: {info}\n")

# Take some random actions
total_reward = 0
for step in range(300):
    # Random action (integer from 0 to action_space.n-1)
    action = env.action_space.sample()
    
    print(f"\nStep {step + 1}: Action {action} = '{action_map.get(action, 'UNKNOWN')}'")
    
    # Step environment - returns (obs, reward, terminated, truncated, info)
    observation, reward, terminated, truncated, info = env.step(action)
    total_reward += reward
    
    print(f"  Reward: {reward:.4f}, Total: {total_reward:.4f}")
    print(f"  Distance: {info['distance']:.2f}m, Time: {info['time']:.2f}s")
    print(f"  Terminated: {terminated}, Truncated: {truncated}")
    
    if terminated or truncated:
        print(f"\n{'SUCCESS!' if info.get('is_success') else 'FAILED!'} Episode finished after {step + 1} steps!")
        print(f"Final distance: {info['distance']:.2f}m")
        break
    
    time.sleep(0.05)  # Small delay to watch

print(f"\nFinal total reward: {total_reward:.4f}")
env.close()

Environment action space size: 16
Action mapping: {0: 'none', 1: 'Q', 2: 'W', 3: 'O', 4: 'P', 5: 'Q+W', 6: 'Q+O', 7: 'Q+P', 8: 'W+O', 9: 'W+P', 10: 'O+P', 11: 'Q+W+O', 12: 'Q+W+P', 13: 'Q+O+P', 14: 'W+O+P', 15: 'Q+W+O+P'}

Initial observation shape: (60,)
Initial observation (first 10 values): [-0.9763941  -0.1822861  -0.20857537 -0.49996138 -0.3929341  -0.9737969
 -0.55740976  0.01065133 -0.50024533 -0.39300483]
Initial info: {'time': 0.0065733334, 'distance': 0.25110978, 'avgspeed': 38.201283, 'is_success': False}


Step 1: Action 3 = 'O'
  Reward: 0.1555, Total: 0.1555
  Distance: 0.25m, Time: 0.01s
  Terminated: False, Truncated: False

Step 2: Action 1 = 'Q'
  Reward: -0.0305, Total: 0.1250
  Distance: 0.25m, Time: 0.01s
  Terminated: False, Truncated: False

Step 3: Action 0 = 'none'
  Reward: -0.0306, Total: 0.0944
  Distance: 0.25m, Time: 0.02s
  Terminated: False, Truncated: False

Step 4: Action 8 = 'W+O'
  Reward: -0.0262, Total: 0.0682
  Distance: 0.26m, Time: 0.02s
  Termi