### XRL Baby Steps Notebook

In this notebook, we implement the baby steps attempt to combine three XRL methods,
in order to explain deep RL.

- SVERL
- Group-SHAPLEY
- Shapley Explainability on Data Manifold

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env

In [None]:
# Parallel environments
vec_env = make_vec_env("CartPole-v1", n_envs=8)


"""
# Set hyperparameters
CartPole-v1:
  n_envs: 8
  n_timesteps: !!float 1e5
  policy: 'MlpPolicy'
  n_steps: 32
  batch_size: 256
  gae_lambda: 0.8
  gamma: 0.98
  n_epochs: 20
  ent_coef: 0.0
  learning_rate: lin_0.001
  clip_range: lin_0.2
"""

# Create the agent
model = PPO("MlpPolicy", vec_env, verbose=1)
model.gamma = 0.98
model.gae_lambda = 0.8
model.learning_rate = 0.001
model.learn(total_timesteps=float(1e5))


In [None]:
model.save("ppo_cartpole")

In [None]:
model = PPO.load("ppo_cartpole")
obs = vec_env.reset()

T = 100 # number of timesteps
for _ in range(T):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = vec_env.step(action)
    vec_env.render("human")
vec_env.close()