In [11]:
import gymnasium as gym
from gymnasium.envs.mujoco.half_cheetah_v4 import HalfCheetahEnv
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

PARAM_DIR = 'params'

def create_custom_env(render_mode=None):
    """
    Creates a custom Frozen Lake environment.
    
    :param custom_map: List of strings representing the map.
    :param is_slippery: Boolean indicating whether the surface is slippery.
    :return: Custom Frozen Lake environment.
    """
    env = HalfCheetahEnv(render_mode=render_mode)
    return env

def train_ppo_on_map(total_timesteps=10000, pretrained_model_path=None, render_mode=None, **kwargs):
    """
    Trains a PPO agent on a custom Frozen Lake map.
    
    :param custom_map: List of strings representing the map.
    :param total_timesteps: Total timesteps for training.
    :param is_slippery: Boolean indicating whether the surface is slippery.
    :return: Trained PPO model.
    """
    # Create the environment
    env = create_custom_env(render_mode=render_mode, **kwargs)
    env = make_vec_env(lambda: env, n_envs=1)  # Vectorize the environment

    # Initialize the PPO model
    if pretrained_model_path is not None:
        model = PPO.load(pretrained_model_path, env)
    else:
        model = PPO("MlpPolicy", env, verbose=1)

    # Train the model
    model.learn(total_timesteps=total_timesteps)

    return model



# Train the PPO agent on the baseline map
# trained_model = train_ppo_on_map(total_timesteps=50000)

# Save the model
# trained_model.save(f"{PARAM_DIR}/ppo_frozenlake_baseline")


## Using Modified Gym Environments

In [21]:
from gym_extensions.continuous.mujoco.modified_half_cheetah import ModifiedHalfCheetahEnv

def create_custom_env(render_mode=None, modified_env=True):
    """
    Creates a custom Frozen Lake environment.
    
    :param custom_map: List of strings representing the map.
    :param is_slippery: Boolean indicating whether the surface is slippery.
    :return: Custom Frozen Lake environment.
    """
    print("Creating Modified environment")
    env = ModifiedHalfCheetahEnv()
    return env

train_ppo_on_map(total_timesteps=50000, pretrained_model_path=None, render_mode=None)

MujocoDependencyError: To use MuJoCo, you need to either populate ~/.mujoco/mjkey.txt and ~/.mujoco/mjpro131, or set the MUJOCO_PY_MJKEY_PATH and MUJOCO_PY_MJPRO_PATH environment variables appropriately. Follow the instructions on https://github.com/openai/mujoco-py for where to obtain these.