<a href="https://colab.research.google.com/github/Andyroo888/AISF-RL-Task/blob/main/acm_ai_proj_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# 1. Install the system dependency SWIG first
!apt-get install -y swig

# 2. Then install the Python libraries
!pip install gymnasium[box2d] stable-baselines3 shimmy moviepy

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  swig4.0
Suggested packages:
  swig-doc swig-examples swig4.0-examples swig4.0-doc
The following NEW packages will be installed:
  swig swig4.0
0 upgraded, 2 newly installed, 0 to remove and 1 not upgraded.
Need to get 1,116 kB of archives.
After this operation, 5,542 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig4.0 amd64 4.0.2-1ubuntu1 [1,110 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 swig all 4.0.2-1ubuntu1 [5,632 B]
Fetched 1,116 kB in 1s (1,449 kB/s)
Selecting previously unselected package swig4.0.
(Reading database ... 117528 files and directories currently installed.)
Preparing to unpack .../swig4.0_4.0.2-1ubuntu1_amd64.deb ...
Unpacking swig4.0 (4.0.2-1ubuntu1) ...
Selecting previously unselected package swig.
Preparing to unpack .../swig_4.0.2-1ubun

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from IPython.display import Video, display
import imageio
import os

# --- GLOBAL CONFIG ---
LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
STEPS = 300000

def run_experiment(exp_name, net_arch, ent_coef, timesteps=300000):
    """
    Runs training WITH Normalization (The Secret Sauce).
    """
    print(f"\n{'='*40}")
    print(f"🚀 STARTING EXPERIMENT: {exp_name}")
    print(f"🧠 Brain: {net_arch} | 🎲 Entropy: {ent_coef}")
    print(f"{'='*40}")

    # 1. Setup Training Environment
    run_log_dir = os.path.join(LOG_DIR, exp_name)
    os.makedirs(run_log_dir, exist_ok=True)
    stats_path = os.path.join(run_log_dir, "vec_normalize.pkl")

    env = gym.make("BipedalWalker-v3", render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # --- THE FIX: ADD NORMALIZATION BACK ---
    env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 2. Setup Model
    # Note: If using a small brain [64,64], we must pass policy_kwargs=None or the dict
    if net_arch == [64, 64]:
        policy_kwargs = None # Defaults to [64, 64] internally
    else:
        policy_kwargs = dict(net_arch=net_arch)

    model = PPO(
        "MlpPolicy",
        env,
        verbose=0,
        tensorboard_log=LOG_DIR,
        policy_kwargs=policy_kwargs,
        ent_coef=ent_coef,
        learning_rate=0.0003,
        n_steps=2048,
        batch_size=64,
        n_epochs=10,
        gamma=0.99,
        gae_lambda=0.95,
        clip_range=0.2,
    )

    # 3. Train
    print(f"⏳ Training for {timesteps} steps...")
    model.learn(total_timesteps=timesteps)

    # 4. Save Model AND Stats
    model.save(os.path.join(run_log_dir, "final_model"))
    env.save(stats_path) # <--- CRITICAL: Save the "glasses"
    env.close()

    # --- 5. EVALUATION ---
    print(f"🎥 Recording video for {exp_name}...")

    # Re-create environment for eval
    eval_env = gym.make("BipedalWalker-v3", render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the EXACT normalization stats from training
    eval_env = VecNormalize.load(stats_path, eval_env)
    eval_env.training = False     # Don't update stats during test
    eval_env.norm_reward = False  # See real raw score

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    # Run episode
    while not done:
        img = eval_env.render()
        images.append(img)
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    # Save Video
    video_path = os.path.join(VIDEO_DIR, f"{exp_name}.mp4")
    imageio.mimsave(video_path, images, fps=30)

    print(f"✅ Finished: {exp_name}")
    print(f"🏆 Final Score: {total_reward:.2f}")

    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

    return run_log_dir

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.
  return datetime.utcnow().replace(tzinfo=utc)


In [None]:
#1a. Initial ideal hypothesis: Big brain + High entropy
run_experiment(
    exp_name="2_high_entropy_instability",
    net_arch=[256, 256],
    ent_coef=0.1,
    timesteps=300_000
)


🚀 STARTING EXPERIMENT: 2_high_entropy_instability
🧠 Brain: [256, 256] | 🎲 Entropy: 0.1
⏳ Training for 300000 steps...
🎥 Recording video for 2_high_entropy_instability...




✅ Finished: 2_high_entropy_instability
🏆 Final Score: -107.24


'./logs/2_high_entropy_instability'

In [None]:
#1b. Ablation #1 (lower entropy): Big brain + Low entropy
run_experiment(
    exp_name="3_golden_success",
    net_arch=[256, 256],
    ent_coef=0.0001,
    timesteps=STEPS
)


🚀 STARTING EXPERIMENT: 3_golden_success
🧠 Brain: [256, 256] | 🎲 Entropy: 0.0001


  from pkg_resources import resource_stream, resource_exists
  return datetime.utcnow().replace(tzinfo=utc)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)


⏳ Training for 300000 steps...


  return datetime.utcnow().replace(tzinfo=utc)


🎥 Recording video for 3_golden_success...




✅ Finished: 3_golden_success
🏆 Final Score: 254.36


'./logs/3_golden_success'

In [None]:
# 2a. (Reduce brain): Small brain + High entropy
run_experiment(
    exp_name="1_baseline_small_brain",
    net_arch=[64, 64],
    ent_coef=0.1,
    timesteps=STEPS
)


🚀 STARTING EXPERIMENT: 1_baseline_small_brain
🧠 Brain: [64, 64] | 🎲 Entropy: 0.1
⏳ Training for 300000 steps...
🎥 Recording video for 1_baseline_small_brain...




✅ Finished: 1_baseline_small_brain
🏆 Final Score: 223.90


'./logs/1_baseline_small_brain'

In [None]:
# 2b. Aditional ablation: Small brain + Low entropy
run_experiment(
    exp_name="1_baseline_small_brain",
    net_arch=[64, 64],
    ent_coef=0.0001,
    timesteps=300_000
)


🚀 STARTING EXPERIMENT: 1_baseline_small_brain
🧠 Brain: [64, 64] | 🎲 Entropy: 0.0001
⏳ Training for 300000 steps...
🎥 Recording video for 1_baseline_small_brain...




✅ Finished: 1_baseline_small_brain
🏆 Final Score: 275.99


'./logs/1_baseline_small_brain'

In [None]:
# 2bii. New Ideal (2b) with greater timestamps
run_experiment(
    exp_name="3_golden_success",
    net_arch=[64, 64],
    ent_coef=0.0001,
    timesteps= 1000000
)


🚀 STARTING EXPERIMENT: 3_golden_success
🧠 Brain: [64, 64] | 🎲 Entropy: 0.0001


  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
  return datetime.utcnow().replace(tzinfo=utc)


⏳ Training for 1000000 steps...


  return datetime.utcnow().replace(tzinfo=utc)


🎥 Recording video for 3_golden_success...




✅ Finished: 3_golden_success
🏆 Final Score: 283.23


'./logs/3_golden_success'

In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 1_000_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ LOWER LEARNING RATE: We want to "fine-tune", not "re-learn"
            learning_rate=0.0001,
            # ⬇️ HIGHER GAMMA: Hardcore requires looking further ahead
            gamma=0.999,
            ent_coef=0.001
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...


  from pkg_resources import resource_stream, resource_exists
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)
Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages
  declare_namespace(pkg)


👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


  return datetime.utcnow().replace(tzinfo=utc)


In [None]:
# 2b. Aditional ablation: Small brain + Low entropy
run_experiment(
    exp_name="1_baseline_small_brain",
    net_arch=[64, 64],
    ent_coef=0.0001,
    timesteps=300_000
)


🚀 STARTING EXPERIMENT: 1_baseline_small_brain
🧠 Brain: [64, 64] | 🎲 Entropy: 0.0001




⏳ Training for 300000 steps...


  return datetime.utcnow().replace(tzinfo=utc)


🎥 Recording video for 1_baseline_small_brain...




✅ Finished: 1_baseline_small_brain
🏆 Final Score: 264.41


'./logs/1_baseline_small_brain'

In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
⏳ Transfer training for 300000 steps...
🎥 Recording result...




🏆 Final Score: 90.06


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()



🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
⏳ Transfer training for 300000 steps...
🎥 Recording result...




🏆 Final Score: 137.26


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip


In [None]:
#HARDCORE- 5

import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from stable_baselines3.common.callbacks import CheckpointCallback
from IPython.display import Video, display
import imageio
import numpy as np
import os

# --- CONFIG ---
# We name this experiment "Transfer" because we are transferring knowledge
EXP_NAME = "11_transfer_learning"

# 1. ⚠️ POINT TO YOUR ACTUAL SAVED FILES
PRETRAINED_MODEL_PATH = "./logs/1_baseline_small_brain/final_model.zip"
PRETRAINED_STATS_PATH = "./logs/1_baseline_small_brain/vec_normalize.pkl"

LOG_DIR = "./logs/"
VIDEO_DIR = "./videos/"
models_dir = os.path.join(LOG_DIR, "models")
os.makedirs(LOG_DIR, exist_ok=True)
os.makedirs(VIDEO_DIR, exist_ok=True)
os.makedirs(models_dir, exist_ok=True)

STEPS = 300_000

def run_transfer_learning():
    print(f"\n{'='*40}")
    print(f"🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore")
    print(f"   Loading '{PRETRAINED_MODEL_PATH}'...")
    print(f"{'='*40}")

    run_log_dir = os.path.join(LOG_DIR, EXP_NAME)
    os.makedirs(run_log_dir, exist_ok=True)

    # 2. Setup Hardcore Environment
    # Note: We removed VecFrameStack because your saved model didn't use it!
    env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    env = Monitor(env, run_log_dir)
    env = DummyVecEnv([lambda: env])

    # 3. LOAD THE GLASSES (Normalization Stats)
    # We load the stats from the old run so the robot sees the world correctly immediately
    print(f"👓 Loading normalization stats from: {PRETRAINED_STATS_PATH}")
    try:
        env = VecNormalize.load(PRETRAINED_STATS_PATH, env)
        # We turn training ON so it can adapt its eyes to the new Hardcore obstacles
        env.training = True
        env.norm_reward = True
    except FileNotFoundError:
        print("❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).")
        env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.)

    # 4. LOAD THE VETERAN BRAIN
    print(f"🧠 Loading brain weights from: {PRETRAINED_MODEL_PATH}")

    try:
        model = PPO.load(
            PRETRAINED_MODEL_PATH,
            env=env,
            # ⬇️ Keep low to preserve walking skill
            learning_rate=0.0001,
            # ⬇️ Good for long-term survival
            gamma=0.999,
            # ⬇️ INCREASED: Force it to try new things (stop freezing!)
            ent_coef=0.003,
            # ⬇️ INCREASED: Allow bigger mental adjustments
            clip_range=0.3
        )
    except FileNotFoundError:
        print(f"❌ ERROR: Model not found at {PRETRAINED_MODEL_PATH}")
        return
    except ValueError as e:
        print(f"❌ CRITICAL ERROR: Shape mismatch! Did you use FrameStack in one run but not the other?\n{e}")
        return

    # 5. Checkpoint Callback (Save every 100k steps in case it crashes)
    checkpoint_callback = CheckpointCallback(
        save_freq=100_000,
        save_path=models_dir,
        name_prefix=f"{EXP_NAME}_ckpt"
    )

    print(f"⏳ Transfer training for {STEPS} steps...")
    model.learn(total_timesteps=STEPS, callback=checkpoint_callback)

    # 6. Save Final
    model.save(os.path.join(run_log_dir, "final_hardcore_model"))
    env.save(os.path.join(run_log_dir, "vec_normalize.pkl"))
    env.close()

    # --- EVALUATION ---
    print(f"🎥 Recording result...")
    eval_env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="rgb_array")
    eval_env = DummyVecEnv([lambda: eval_env])

    # Load the NEW stats we just learned in Hardcore
    eval_env = VecNormalize.load(os.path.join(run_log_dir, "vec_normalize.pkl"), eval_env)
    eval_env.training = False
    eval_env.norm_reward = False

    obs = eval_env.reset()
    done = False
    images = []
    total_reward = 0

    while not done:
        images.append(eval_env.render())
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, _ = eval_env.step(action)
        total_reward += reward[0]

    eval_env.close()

    video_path = os.path.join(VIDEO_DIR, f"{EXP_NAME}.mp4")
    imageio.mimsave(video_path, images, fps=30)
    print(f"🏆 Final Score: {total_reward:.2f}")
    try:
        display(Video(video_path, embed=True, html_attributes="controls autoplay loop", width=400))
    except:
        pass

if __name__ == "__main__":
    run_transfer_learning()


🎓 STARTING TRANSFER LEARNING: Normal -> Hardcore
   Loading './logs/1_baseline_small_brain/final_model.zip'...
👓 Loading normalization stats from: ./logs/1_baseline_small_brain/vec_normalize.pkl
❌ Error: vec_normalize.pkl not found. Creating fresh normalization (Results may be worse initially).
🧠 Loading brain weights from: ./logs/1_baseline_small_brain/final_model.zip
❌ ERROR: Model not found at ./logs/1_baseline_small_brain/final_model.zip
