# W19D4 ‚Äî Hyperparameter Optimization with Optuna

**Team:** [Your Team Name]  
**Runner:** [Name]  
**Date:** [Date]

---

## Objectives
1. Define a search space for PPO hyperparameters
2. Run an Optuna HPO sweep
3. Export leaderboard and best config
4. Select a ship candidate

---

## 1. Setup

In [None]:
# Install packages (uncomment if running in Colab)
# !pip install gymnasium stable-baselines3 optuna

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_vec_env
import optuna
from optuna.samplers import TPESampler
import pandas as pd
import json
import os
import warnings
warnings.filterwarnings('ignore')

print(f"Optuna version: {optuna.__version__}")

## 2. Configuration

Set your trial budget here. Start small in-class (8-12), expand for Sunday (25+).

In [None]:
# === CONFIGURATION ===
N_TRIALS = 10          # In-class: 8-12, Sunday: 25+
TIMESTEPS_PER_TRIAL = 20_000  # Shorter for HPO speed
N_EVAL_EPISODES = 5    # Episodes for evaluation
SEED = 42              # Base seed

# Output paths
RESULTS_DIR = "../results"
os.makedirs(RESULTS_DIR, exist_ok=True)

print(f"Running {N_TRIALS} trials, {TIMESTEPS_PER_TRIAL:,} timesteps each")

## 3. Define Search Space

The objective function defines:
1. Which hyperparameters to tune (search space)
2. How to train with those hyperparameters
3. What metric to return (for Optuna to optimize)

In [None]:
def objective(trial):
    """
    Optuna objective function for PPO hyperparameter tuning.
    
    Args:
        trial: Optuna trial object that suggests hyperparameters
    
    Returns:
        float: Mean evaluation reward (higher is better)
    """
    
    # === SEARCH SPACE ===
    # Optuna will try different values within these ranges
    
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True)
    n_steps = trial.suggest_categorical("n_steps", [16, 32, 64, 128, 256, 512])
    gamma = trial.suggest_float("gamma", 0.9, 0.9999, log=True)
    ent_coef = trial.suggest_float("ent_coef", 1e-8, 0.1, log=True)
    clip_range = trial.suggest_float("clip_range", 0.1, 0.4)
    
    # === CREATE ENVIRONMENT ===
    env = make_vec_env("CartPole-v1", n_envs=1, seed=SEED)
    
    # === CREATE MODEL ===
    model = PPO(
        "MlpPolicy",
        env,
        learning_rate=learning_rate,
        n_steps=n_steps,
        gamma=gamma,
        ent_coef=ent_coef,
        clip_range=clip_range,
        verbose=0,
        seed=SEED
    )
    
    # === TRAIN ===
    try:
        model.learn(total_timesteps=TIMESTEPS_PER_TRIAL)
    except Exception as e:
        # If training fails, return a bad score
        print(f"Trial {trial.number} failed: {e}")
        return 0.0
    
    # === EVALUATE ===
    eval_env = gym.make("CartPole-v1")
    mean_reward, _ = evaluate_policy(
        model, 
        eval_env, 
        n_eval_episodes=N_EVAL_EPISODES,
        deterministic=True
    )
    
    env.close()
    eval_env.close()
    
    return mean_reward

print("Objective function defined.")
print("Search space: learning_rate, n_steps, gamma, ent_coef, clip_range")

## 4. Run HPO Sweep

In [None]:
# Create Optuna study (maximize = higher reward is better)
study = optuna.create_study(
    direction="maximize",
    sampler=TPESampler(seed=SEED),
    study_name="cartpole_ppo_hpo"
)

print(f"Starting HPO sweep with {N_TRIALS} trials...")
print("This may take a while. Progress will be shown below.")
print("="*50)

In [None]:
# Run the optimization
study.optimize(
    objective, 
    n_trials=N_TRIALS,
    show_progress_bar=True
)

print("\n" + "="*50)
print("HPO SWEEP COMPLETE")
print("="*50)

## 5. Export Results

In [None]:
# Create leaderboard DataFrame
trials_data = []
for trial in study.trials:
    row = {
        "trial": trial.number,
        "mean_reward": trial.value,
        **trial.params
    }
    trials_data.append(row)

leaderboard = pd.DataFrame(trials_data)
leaderboard = leaderboard.sort_values("mean_reward", ascending=False)

# Save leaderboard
leaderboard_path = os.path.join(RESULTS_DIR, "hpo_leaderboard.csv")
leaderboard.to_csv(leaderboard_path, index=False)
print(f"‚úÖ Saved: {leaderboard_path}")

# Display top 10
print("\nTop 10 Trials:")
print(leaderboard.head(10).to_string(index=False))

In [None]:
# Export best config
best_config = {
    "trial_number": study.best_trial.number,
    "mean_reward": study.best_value,
    "params": study.best_params
}

config_path = os.path.join(RESULTS_DIR, "best_config.json")
with open(config_path, "w") as f:
    json.dump(best_config, f, indent=2)
print(f"‚úÖ Saved: {config_path}")

# Display best config
print("\n" + "="*50)
print("BEST CONFIGURATION")
print("="*50)
print(f"Trial: {study.best_trial.number}")
print(f"Mean Reward: {study.best_value:.2f}")
print("\nHyperparameters:")
for key, value in study.best_params.items():
    print(f"  {key}: {value}")
print("="*50)

## 6. Visualize Results (Optional)

In [None]:
# Plot optimization history
try:
    fig = optuna.visualization.plot_optimization_history(study)
    fig.show()
except:
    print("Visualization not available (install plotly for interactive plots)")
    # Fallback: simple matplotlib plot
    import matplotlib.pyplot as plt
    plt.figure(figsize=(10, 5))
    plt.plot([t.number for t in study.trials], [t.value for t in study.trials], 'o-')
    plt.xlabel("Trial")
    plt.ylabel("Mean Reward")
    plt.title("HPO Optimization History")
    plt.grid(True)
    plt.show()

In [None]:
# Plot parameter importances
try:
    fig = optuna.visualization.plot_param_importances(study)
    fig.show()
except:
    print("Parameter importance plot not available")

## 7. Summary for Ship Candidate Selection

Copy this to `docs/ship_candidate.md`

In [None]:
print("\nüìã COPY FOR docs/ship_candidate.md:")
print("="*50)
print(f"Total trials: {len(study.trials)}")
print(f"Best trial: #{study.best_trial.number}")
print(f"Best mean reward: {study.best_value:.2f}")
print("\nBest config JSON:")
print(json.dumps(study.best_params, indent=2))
print("="*50)
print(f"\nLeaderboard saved to: {leaderboard_path}")
print(f"Best config saved to: {config_path}")

---

## Next Steps

### In-Class (W19D4)
1. ‚úÖ Review leaderboard and select ship candidate
2. ‚úÖ Complete `docs/ship_candidate.md`
3. ‚úÖ Update `docs/runbook.md` with HPO run instructions

### After Class (Due Sunday)
1. ‚û°Ô∏è Runner: Expand sweep to 25+ trials (change N_TRIALS above)
2. ‚û°Ô∏è Each fellow: Write individual Google Doc reflection
3. ‚û°Ô∏è Update PR with final artifacts