## 1. Install & imports

In [None]:
!pip install -q -r ../requirements.txt
import sys
sys.path.append("..")
import yaml
import numpy as np
import gym
from stable_baselines3 import PPO
from utils.data_loader import load_demonstrations
from envs.humanoid_demo_env import HumanoidDemoEnv

## 2. Load config + demo data

In [None]:
with open("../config.yaml") as f:
    cfg = yaml.safe_load(f)

demo_states, demo_actions = load_demonstrations(cfg["csv_path"])

## 3. Create environment & RL model

In [None]:
env = HumanoidDemoEnv(
    xml_path=cfg["xml_path"],
    demo_states=demo_states,
    demo_actions=demo_actions,
    cfg=cfg
)

model = PPO(
    policy="MlpPolicy",
    env=env,
    learning_rate=cfg["learning_rate"],
    n_steps=cfg["n_steps"],
    batch_size=cfg["batch_size"],
    gamma=cfg["gamma"],
    verbose=1,
    tensorboard_log="./tensorboard/"
)

## 4. Train the model

In [None]:
model.learn(total_timesteps=cfg["total_timesteps"])
model.save("ppo_humanoid")

## 5. Evaluate & render

In [None]:
env.use_imitation = False
obs = env.reset()
for _ in range(1000):
    action, _ = model.predict(obs)
    obs, _, _, _ = env.step(action)
    env.render()