 #### Install necessary dependacies

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
import os
os.environ["MUJOCO_GL"] = "egl"

#### Create environment and train model

In [None]:
env_name = "Ant-v5"
env = gym.make(env_name, render_mode = "rgb_array")

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./Ant_tensorboard/

In [None]:
model = PPO(
    'MlpPolicy',
    env,
    verbose=1,
    n_envs=3, 
    n_timesteps= 1000000,
    batch_size=32,
    n_steps=512,
    gamma=.99,
    learning_rate= 1.90609e-05,
    ent_coef= 4.9646e-07,
    clip_range=0.1,
    n_epochs=10,
    gae_lambda=0.8,
    max_grad_norm=0.6,
    vf_coef= 0.677239,
    tensorboard_log="./Ant_tensorboard"
)


#train model here
model.learn(total_timesteps=2000000)

model.save("model_Ant")

#### Valdiate model by having it interact with live MuJoCo environment

In [None]:
#Load model here
model.load("model_Ant")

In [None]:
from gymnasium.wrappers import RecordVideo
import matplotlib.pyplot as plt
env_name = "Ant-v5"
env = gym.make(env_name, render_mode = "rgb_array")
env = RecordVideo(env, "./")

env_data = env.reset()
obs = env_data[0]

episode_reward = 0
max_ep_timesteps = 1000000

for t in range(max_ep_timesteps):
    print(f"timestep: {t}")

    action, _ = model.predict(obs)
    env_data = env.step(action)
    obs = env_data[0]
    reward = env_data[1]
    done = env_data[2]

    episode_reward += reward

    print(f"action: {action}")
    print(f"episode reward: {episode_reward}")
    print(env_data[1:])

    if done: break
env.close()