# Bipedal walker
https://gymnasium.farama.org/environments/box2d/bipedal_walker/

## Setup

In [22]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback
from gymnasium import Env
import numpy as np

name = 'ppo_walker_v1'
env_id = "BipedalWalker-v3"
n_envs = 4

policy = 'MlpPolicy'
tensorboard_log = f"./{name}/t_logs/"
path = f"./{name}/model/"
device = 'cpu'

def make_env(render_mode:str='rgb_array'):
    e = gym.make(env_id, hardcore=False, render_mode=render_mode)
    return e

env = make_vec_env(make_env, n_envs)


class SaveOnStep(BaseCallback):
    def __init__(self, steps: int, path: str, verbose: int = 0):
        super().__init__(verbose)
        self.steps = steps
        self.save_path = path

    def _on_step(self) -> bool:
        # Check if the current step matches the saving frequency
        if self.n_calls % self.steps == 0:
            # Save model with the current timestep in the filename
            if self.verbose > 0:
                print(f"Saving model at step {self.n_calls} to {self.save_path}")
            self.model.save(self.save_path)
        return True
    
callbacks = [SaveOnStep(2.5e4, path)]


## Create Model

In [None]:
# /!\ #
model = PPO(
    policy,
    env,
    verbose=0,
    tensorboard_log=tensorboard_log,
    device=device
)
#model.save(path)



## Load model

In [None]:
model = PPO.load(path,env)

## Learn

In [23]:
total_timesteps = 1e6

model.learn(
    total_timesteps, 
    reset_num_timesteps=False, 
    progress_bar=True, 
    callback=callbacks)
model.save(path)

Output()

## Display

In [None]:
display_env = make_env(render_mode='human')

for e in range(3):
    obs,_ = display_env.reset()
    while True:
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = display_env.step(action)

        if terminated or truncated:
            break

    