In [14]:
import os
import sys
import numpy as np
import pandas as pd
print(os.getcwd())              # see where we are
print(os.listdir(".."))         # see parent contents
print(os.listdir("../models"))  # see what's inside models
# (optional, but better to run once in a separate cell)
!pip install "stable-baselines3[extra]"

from stable_baselines3 import PPO

# If this notebook is inside evaluation/, go up one level to see env/
sys.path.append(os.path.abspath(".."))

from env.stardew_mine_env import StardewMineEnv
from evaluation.visualization_tools import summarize_metrics, plot_hist, plot_scatter


/Users/mingkunliu/Downloads/175/Strawdew_Valley_Mining_Bot/evaluation
['agents', 'models', '__pycache__', 'README.md', 'env', 'results', 'evaluation', '.git', 'Mining_Bot.ipynb', 'environment.ipynb', 'train_strawdew_ppo.ipynb']
['ppo_model.zip']


In [7]:
class LoggingStardewEnv(StardewMineEnv):
    """
    Wraps StardewMineEnv to track episode-level metrics for evaluation.
    Does NOT change the observation or action space.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._reset_episode_metrics()

    def _reset_episode_metrics(self):
        self.episode_ore_value = 0           # total ore value (here ore_value = 1 per ore)
        self.episode_energy_start = self.MAX_ENERGY
        self.episode_steps = 0
        self.episode_collapsed = 0          # 1 if ended due to energy <= 0
        self.episode_unique_tiles = set()   # (floor, x, y)
        self.episode_ores_mined = 0
        self.episode_rocks_mined = 0
        self.episode_max_floor = 0

    def reset(self, seed=None, options=None):
        obs, info = super().reset(seed=seed, options=options)
        self._reset_episode_metrics()

        ax, ay = int(self.agent_location[0]), int(self.agent_location[1])
        self.episode_unique_tiles.add((int(self.floor), ax, ay))
        self.episode_max_floor = int(self.floor)

        return obs, info

    def _mine_tile(self, action: int):
        """
        Copy of base _mine_tile, plus counters for ores / rocks / ore_value.
        """
        reward = 0.0
        self.energy -= 1

        dir_idx = action - 8
        direction = self._action_to_direction.get(dir_idx, np.array([0, 0]))
        tx = int(self.agent_location[0] + direction[0])
        ty = int(self.agent_location[1] + direction[1])

        if not (0 <= tx < self.SIZE and 0 <= ty < self.SIZE):
            # out of bounds
            return reward - 1

        tile = self.grid[ty, tx]

        if tile == self.ORE:
            reward += 1
            self.episode_ores_mined += 1
            self.episode_ore_value += 1  # ore_value = 1 in this env
        elif tile == self.EMPTY:
            reward -= 1
        else:
            # WEED or ROCK or other non-empty
            reward -= 0.01
            if tile == self.ROCK or tile == self.WEED:
                self.episode_rocks_mined += 1

        # clear tile
        self.grid[ty, tx] = self.EMPTY

        # restore ladder if needed
        if self._ladder_location is not None and (tx, ty) == self._ladder_location:
            self.grid[ty, tx] = self.LADDER

        return reward

    def step(self, action: int):
        """
        Use the base step() to update the env and get reward, then
        update our metrics and attach them to info when episode ends.
        """
        obs, reward, terminated, truncated, info = super().step(action)

        self.episode_steps += 1

        # track visited tiles after move
        ax, ay = int(self.agent_location[0]), int(self.agent_location[1])
        self.episode_unique_tiles.add((int(self.floor), ax, ay))
        self.episode_max_floor = max(self.episode_max_floor, int(self.floor))

        # energy collapse vs grid-empty
        # (base env sets terminated in both cases; we distinguish by energy)
        if terminated or truncated:
            if self.energy <= 0:
                self.episode_collapsed = 1
            else:
                self.episode_collapsed = 0

            energy_used = self.episode_energy_start - self.energy
            if energy_used <= 0:
                energy_used = 1  # avoid divide-by-zero

            energy_efficiency = self.episode_ore_value / energy_used

            total_tiles = len(self.episode_unique_tiles)

            if self.episode_rocks_mined > 0:
                ore_to_rock_ratio = self.episode_ores_mined / self.episode_rocks_mined
            else:
                ore_to_rock_ratio = np.nan

            info = info or {}
            info["episode_metrics"] = {
                "total_ore_value": float(self.episode_ore_value),
                "energy_used": float(energy_used),
                "energy_efficiency": float(energy_efficiency),
                "collapse": int(self.episode_collapsed),
                "unique_tiles_visited": int(total_tiles),
                "ores_mined": int(self.episode_ores_mined),
                "rocks_mined": int(self.episode_rocks_mined),
                "ore_to_rock_ratio": float(ore_to_rock_ratio),
                "max_floor": int(self.episode_max_floor),
            }

        return obs, reward, terminated, truncated, info


In [8]:
def run_evaluation(model_path: str, n_episodes: int = 50):
    """
    Load a PPO model and evaluate it for n_episodes in the logging env.
    Returns a pandas DataFrame with one row per episode.
    """
    env = LoggingStardewEnv(size=10, max_floor=10, max_energy=100, local_view_size=5)
    model = PPO.load(model_path)

    all_metrics = []

    for ep in range(n_episodes):
        obs, _ = env.reset()
        done = False
        truncated = False
        ep_reward = 0.0

        while not (done or truncated):
            action, _ = model.predict(obs, deterministic=True)
            obs, reward, done, truncated, info = env.step(action)
            ep_reward += reward

            if done or truncated:
                metrics = info.get("episode_metrics", {}).copy()
                metrics["episode"] = ep
                metrics["total_reward"] = float(ep_reward)
                all_metrics.append(metrics)

    df = pd.DataFrame(all_metrics)
    return df


In [None]:
# Example: adjust path to your actual model
model_path = "../models/ppo_model.zip"  # or whatever your filename is
# model_path = "../models/ppo_miningbot_20251130_234440.zip/ppo_miningbot.zip"  # or whatever your filename is
df = run_evaluation(model_path, n_episodes=50)
df.head()


ValueError: Error: Unexpected observation shape () for Box environment, please use (1,) or (n_env, 1) for the observation shape.

In [None]:
summarize_metrics(df)

# Example plots
plot_hist(df, "total_ore_value")
plot_hist(df, "energy_efficiency")
plot_hist(df, "max_floor")

plot_scatter(df, "energy_used", "total_ore_value")
plot_scatter(df, "unique_tiles_visited", "total_ore_value")
