## This Notebook is an initial scrap file to play around with Gymnasium's Car Racing environment. 

In [1]:
import gymnasium as gym
import environments.cartpole_dualpendulum

from stable_baselines3 import PPO, DQN
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import BaseCallback


In [3]:
env = gym.make("DualCartPole-v4", render_mode="rgb_array")
print(env.observation_space.shape)  # Should print (6,)

# Play around with a custom reward function
class CustomReward(gym.RewardWrapper):
    """
    Custom reward function for the cart pole environment
    """
    def __init__(self, env):
        super().__init__(env)
    
    def reward(self, reward):
        # Get the current state
        state = self.unwrapped.state

        # Unpack the state
        x, x_dot, theta1, theta1_dot, theta2, theta2_dot = state

        # Reward
        reward = (
            1.0 - (x) - 0.01 * (x_dot) - 0.001
        )
        return reward


class RewardCallback(BaseCallback):
    def _on_step(self) -> bool:
        infos = self.locals.get("infos", [])
        for info in infos:
            if "episode" in info:
                print(f"Episode reward: {info['episode']['r']}")
        return True


# Set the wrapper
env = CustomReward(env)
env = Monitor(env)

# Create and train the PPO model
model = DQN("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=2000000, callback=RewardCallback())
model.save("dqn_cartpole")

# Delete and reload the model
del model

(6,)
Using cpu device
Wrapping the env in a DummyVecEnv.
Episode reward: 26.8724
Episode reward: 31.90878
Episode reward: 67.417238
Episode reward: 16.943892
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36       |
|    ep_rew_mean      | 35.8     |
|    exploration_rate | 0.999    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 5725     |
|    time_elapsed     | 0        |
|    total_timesteps  | 144      |
| train/              |          |
|    learning_rate    | 0.0001   |
|    loss             | 0.499    |
|    n_updates        | 10       |
----------------------------------
Episode reward: 21.947922
Episode reward: 34.930441
Episode reward: 48.907272
Episode reward: 20.960212
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.9     |
|    ep_rew_mean      | 33.7     |
|    exploration_rate | 0.997    |
| time/               |          |
|  

In [2]:
env = gym.make("DualCartPole-v4", render_mode="human")  # "human" for real-time visualization
model = DQN.load("dqn_cartpole")

episodes = 0
while episodes < 10:
    obs, _ = env.reset()

    # Override internal state
    env.unwrapped.state = [0.5, 0.0, 0.0, 0.0, 0.0, 0.0]

    # Also update observation if needed
    obs = env.unwrapped.state

    done = False
    while not done:
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
    episodes += 1

env.close()

AttributeError: 'list' object has no attribute 'shape'

: 