# Random Baseline FrozenLake 

In [19]:
import gymnasium as gym
from pathlib import Path
from gymnasium.wrappers import RecordVideo
from datetime import datetime
from stable_baselines3.common.monitor import Monitor
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [20]:
docs_path = Path("../../documentation/frozenlake/random-baseline") # ../ makes it so it writes to a directory one back from current one

run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
run_path = docs_path / f"run_{run_id}"

video_dir = run_path / "videos"
graphs_dir  = run_path / "graphs"
report_file = run_path / "random_baseline.md"
monitor_dir = run_path / "monitor"

docs_path.mkdir(parents=True, exist_ok=True)
graphs_dir.mkdir(parents=True, exist_ok=True)
docs_path.mkdir(parents=True, exist_ok=True)
video_dir.mkdir(parents=True, exist_ok=True)
monitor_dir.mkdir(parents=True, exist_ok=True)

monitor_file = str(monitor_dir) #/ "monitor_log.csv"


In [21]:
def run_random_baseline_with_monitor(video_dir, monitor_file, num_episodes=500, seed=42):
    print("\nRunning Random Baseline on frozen lake logging rewards per episode...")

    monitor_path = Path(monitor_file)
    if monitor_path.is_dir():
        monitor_path = monitor_path

    env = gym.make("FrozenLake-v1",is_slippery = True, render_mode="rgb_array")
    env = Monitor(env, str(monitor_path))
    env = RecordVideo(
        env,
        video_folder=str(video_dir),
        episode_trigger=lambda e: e < 5,
        name_prefix="frozenlake_random_baseline"
    )
    # Reset environment
    observation, info = env.reset(seed=42)
    
    rewards = []
    for ep in range(num_episodes):
        obs, info = env.reset(seed=seed + ep)
        done = False
        total_reward = 0
        step = 0
        while not done:
            step += 1
            action = env.action_space.sample()
            obs, reward, terminated, truncated, info = env.step(action)
            done = terminated or truncated
            total_reward += reward

            print(f"Step {step}:")
            print(f"Action taken: {action} ({['Left','Down','Right','Up'][action]})")
            print(f"Reward: {reward}")
            print(f"Terminated: {terminated}, Truncated: {truncated}")
            print("-" * 50)
            
        rewards.append(total_reward)
        print(f"Episode {ep}: reward = {total_reward}")

        if ep == 42:
            log_episode = {
                "episode": ep,
                "seed": seed + ep,
                "action": action,
                "observation":observation,
                "reward":total_reward
            }

    env.close()
    print(f"Logged rewards to {monitor_path}")
    return np.mean(rewards), log_episode
    


In [22]:
def plot_random_baseline_curve(monitor_file, graphs_dir):
    monitor_path = Path(monitor_file)

    if monitor_path.is_dir():
        monitor_path = monitor_path/ "monitor.csv"

    if not monitor_path.exists():
        raise FileNotFoundError(f"No monitor file found at {monitor_path}")

    df = pd.read_csv(monitor_path, skiprows=1)
    plt.figure(figsize=(10, 6))
    plt.plot(df["r"], label="Reward per Episode", color="tab:red", alpha=0.7)
    plt.xlabel("Episodes")
    plt.ylabel("Total Reward")
    plt.title("Random Baseline Learning Curve (FrozenLake-v1)")
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.legend()
    out = Path(graphs_dir) / "random_baseline_curve.png"
    plt.savefig(out, dpi=200, bbox_inches="tight")
    plt.close()

    print(f"Random baseline curve saved → {out}")



In [23]:
env = gym.make("FrozenLake-v1",is_slippery = True, render_mode="rgb_array") 
env = RecordVideo(
    env,
    video_folder=str(video_dir),
    episode_trigger=lambda e: True,
    name_prefix="frozenlake_random_baseline" 
)

  logger.warn(


In [24]:

total_reward = 0
episode_over = False

In [25]:
average_reward,log_episode = run_random_baseline_with_monitor(video_dir, monitor_file, num_episodes=4500, seed=42)


Running Random Baseline on frozen lake logging rewards per episode...
Step 1:
Action taken: 2 (Right)
Reward: 0.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 2:
Action taken: 0 (Left)
Reward: 0.0
Terminated: True, Truncated: False
--------------------------------------------------
Episode 0: reward = 0.0
Step 1:
Action taken: 0 (Left)
Reward: 0.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 2:
Action taken: 3 (Up)
Reward: 0.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 3:
Action taken: 2 (Right)
Reward: 0.0
Terminated: False, Truncated: False
--------------------------------------------------
Step 4:
Action taken: 1 (Down)
Reward: 0.0
Terminated: True, Truncated: False
--------------------------------------------------
Episode 1: reward = 0.0
Step 1:
Action taken: 0 (Left)
Reward: 0.0
Terminated: False, Truncated: False
------------------------

In [26]:
obs_explanation = """\
**Observation (Discrete integer):**
- A single integer (0–15) representing the agent’s position on the 4×4 grid.
- Each cell in the grid corresponds to a unique state:
  - `S` — Start position (state 0)
  - `F` — Frozen tile (safe)
  - `H` — Hole (episode ends with no reward)
  - `G` — Goal (episode ends with +1 reward)

**Action Space (Discrete 4):**
- 0 → Left
- 1 → Down
- 2 → Right
- 3 → Up
"""


failure_conditions = """\
**Episode ends when (termination/truncation):**
- The agent **falls into a hole (`H`)** → `terminated = True`
- The agent **reaches the goal (`G`)** → `terminated = True`
- The **maximum number of steps (100)** is reached → `truncated = True`

**Reward Structure:**
- `+1` for reaching the goal (`G`)
- `0` for all other tiles (including holes)
"""


with open(report_file, "w")as f:
    f.write("## Environment Details\n")
    f.write(f"- Action space: {env.action_space}\n")
    f.write(f"- Observation space: {env.observation_space}\n")
    f.write(f"- Maximum steps per episode: {env.spec.max_episode_steps}\n\n")

    f.write("## Observation Meaning\n")
    f.write(obs_explanation + "\n")

    f.write("## Failure Conditions\n")
    f.write(failure_conditions + "\n")

    f.write("## Example Episode (From the 100 Runs)\n")
    f.write(f"- Episode: {log_episode['episode']}\n")
    f.write(f"- Seed: {log_episode['seed']}\n")
    f.write(f"- Observation: {log_episode['observation']}\n")
    f.write(f"- Total Reward: {log_episode['reward']}\n\n")

    f.write("## 500 episode Performance Summary:\n")
    f.write(f"- Average reward: {average_reward}\n\n")

In [27]:
plot_random_baseline_curve(monitor_file, graphs_dir)

Random baseline curve saved → ../../documentation/frozenlake/random-baseline/run_2025-10-28_10-28-46/graphs/random_baseline_curve.png
