# **Dependencies**

In [3]:
import os
import glob
import tqdm

import gymnasium as gym
from gymnasium.wrappers import RecordVideo
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.callbacks import (
    CheckpointCallback,
    EvalCallback,
    StopTrainingOnRewardThreshold,
    CallbackList
)
from pathlib import Path

from dotenv import load_dotenv

# **Load Environement**

In [36]:
environment_name = "BipedalWalker-v3"

train_env = SubprocVecEnv([
    lambda: gym.make(environment_name, 
        hardcore = True,
        render_mode=None)
    for _ in range(12)
])

test_env = gym.make(environment_name, 
        hardcore = True,
        render_mode = None)

# **Training**

In [37]:
save_path = Path(r"C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\saved_models")
checkpoint_path = Path(r"C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\checkpoints")

In [38]:
stop_callback = StopTrainingOnRewardThreshold(
    reward_threshold=500,
    verbose=1
)

eval_callback = EvalCallback(
    test_env,
    callback_on_new_best=stop_callback,
    eval_freq=5000,
    best_model_save_path=save_path,
    verbose=1
)

checkpoint_callback = CheckpointCallback(
    save_freq=2048,          # every 10k steps
    save_path=checkpoint_path,
    name_prefix="lunar_ppo",
    verbose=2
)

callback = CallbackList([checkpoint_callback, eval_callback])

In [6]:
model = PPO(
    "MlpPolicy", 
    train_env,
    verbose=1,
    
    # --- The 12-Core Optimizations ---
    n_steps=2048,       # Collect less data before learning (12 * 1024 = 12k steps)
    batch_size=128,     # Process larger chunks of data at once (faster on your laptop)
    n_epochs=4,         # Don't over-train on the same data (prevents overfitting)
    
    # --- The Standard Good Stuff ---
    learning_rate=3e-4,
    gamma=0.99,        # Increased to 0.999 (LunarLander needs long-term planning)
    gae_lambda=0.95,
    ent_coef=0.001, 
    vf_coef=0.5,
    max_grad_norm=0.5,
    policy_kwargs=dict(net_arch=[dict(pi=[256, 256], vf=[256, 256])])
)

Using cpu device




In [None]:
model.learn(total_timesteps=10000000, callback=callback , progress_bar= True)

In [None]:
model.save(save_path / "saved_model_hardcore.zip")

# **Loading**


In [15]:
import os
path = r"C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\ssaved_models"  # example
if os.path.exists(path):
    print("✅ File exists")
else:
    print(" File not found")

 File not found


In [17]:
model = PPO.load(r"C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\saved_models\saved_model_normal.zip" , env = test_env)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [41]:
model = PPO.load(r"C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\saved_models\saved_model_hardcore.zip" , env = train_env)

# **Testing**

In [None]:
episodes = 5

for episode in range(1, episodes + 1):
    obs, info = test_env.reset()
    terminated = False
    truncated = False
    score = 0

    while not (terminated or truncated):
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, terminated, truncated, info = test_env.step(action)
        score += reward

    print(f"Episode {episode} - Score: {score}")

In [43]:
test_env.close() 
train_env.close()

# **Record Video**

In [28]:
folder= r"C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos"

for f in glob.glob(os.path.join(folder, "*")):
    os.remove(f)

env = RecordVideo(test_env, video_folder=folder, episode_trigger=lambda x: True)

#Evaluation

episodes = 2

try:    
    for episode in range(1, episodes + 1):
        obs, info = env.reset()
        terminated = False
        truncated = False
        score = 0

        while not (terminated or truncated):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, terminated, truncated, info = env.step(action)
            score += reward

        print(f"Episode {episode} - Score: {score}")

finally:
    env.close() 
    test_env.close()
    print(f"Videos saved to: {folder}")

  logger.warn(


MoviePy - Building video C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos\rl-video-episode-0.mp4.
MoviePy - Writing video C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos\rl-video-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos\rl-video-episode-0.mp4
Episode 1 - Score: 299.10605506195117
MoviePy - Building video C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos\rl-video-episode-1.mp4.
MoviePy - Writing video C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos\rl-video-episode-1.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos\rl-video-episode-1.mp4
Episode 2 - Score: 296.80043407889144
Videos saved to: C:\Users\KIIT\OneDrive\Desktop\VS CODE\Reinforcement\Bipedal\videos


# **Loading Model to hugging face**

In [None]:
# ---------------------------------------------------------
# 1. LOGIN TO HUGGING FACE (FIXES THE 400 BAD REQUEST)
# ---------------------------------------------------------
from huggingface_hub import login
from dotenv import load_dotenv

# TODO: PASTE YOUR WRITE TOKEN HERE
# (Get it from https://huggingface.co/settings/tokens)
load_dotenv()  # Load environment variables from .env file

HUG_TOKEN = os.getenv("HUG_TOKEN")
login(token=HUG_TOKEN)  # --- IGNORE ---

# ---------------------------------------------------------
# 2. IMPORTS
# ---------------------------------------------------------
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from huggingface_sb3 import package_to_hub
from pathlib import Path

# ---------------------------------------------------------
# 3. SETUP YOUR INFO
# ---------------------------------------------------------

# TODO: CHANGE THIS TO YOUR ACTUAL USERNAME
username = "Subcon"

# Model & Repo Names
model_name = "ppo-BipedalWalker-v3"
repo_name = "ppo-BipedalWalker-v3"
repo_id = f"{username}/{repo_name}"

# Environment Config
env_id = "BipedalWalker-v3"
model_architecture = "PPO"
commit_message = "Upload PPO BipedalWalker-v3 trained agent"

# ---------------------------------------------------------
# 4. LOAD THE MODEL (Safety Check)
# ---------------------------------------------------------
# If 'model' doesn't exist in memory (e.g. you restarted VS Code), load it now.
try:
    model
except NameError:
    print("Loading model from file...")
    # Make sure this points to your saved zip file!
    model_path = "ppo_lunar_v2_final.zip" 
    model = PPO.load(model_path)

# ---------------------------------------------------------
# 5. PREPARE EVALUATION ENV
# ---------------------------------------------------------
# We create a dummy environment so Hugging Face can record a video of your agent
eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

# ---------------------------------------------------------
# 6. PUSH TO HUB
# ---------------------------------------------------------
print(f"Pushing model to {repo_id}...")

package_to_hub(
    model=model,                 # The model variable
    model_name=model_name,       # Name of the model file
    model_architecture=model_architecture, # "PPO"
    env_id=env_id,               # "LunarLander-v2"
    eval_env=eval_env,           # The eval env for video recording
    repo_id=repo_id,             # Your username/repo_name
    commit_message=commit_message
)

print("---------------------------------------------------------")
print(f"Done! Your model is live at: https://huggingface.co/{repo_id}")
print("---------------------------------------------------------")

Pushing model to Subcon/ppo-BipedalWalker-v3...
[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m




Saving video to C:\Users\KIIT\AppData\Local\Temp\tmphqngdd5v\-step-0-to-step-1000.mp4
MoviePy - Building video C:\Users\KIIT\AppData\Local\Temp\tmphqngdd5v\-step-0-to-step-1000.mp4.
MoviePy - Writing video C:\Users\KIIT\AppData\Local\Temp\tmphqngdd5v\-step-0-to-step-1000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready C:\Users\KIIT\AppData\Local\Temp\tmphqngdd5v\-step-0-to-step-1000.mp4
[38;5;4mℹ Pushing repo Subcon/ppo-BipedalWalker-v3 to the Hugging Face Hub[0m


policy.optimizer.pth:   0%|          | 0.00/1.18M [00:00<?, ?B/s]

policy.pth:   0%|          | 0.00/588k [00:00<?, ?B/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

Upload 4 LFS files:   0%|          | 0/4 [00:00<?, ?it/s]

ppo-BipedalWalker-v3.zip:   0%|          | 0.00/1.78M [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/Subcon/ppo-BipedalWalker-v3/tree/main/[0m
---------------------------------------------------------
Done! Your model is live at: https://huggingface.co/Subcon/ppo-BipedalWalker-v3
---------------------------------------------------------
