In [10]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from src.mine_evac_env import MineEvacEnv
import os

# 项目根目录
project_root = os.path.dirname(os.path.abspath("./mineEvac-python"))
layout_path = os.path.join(project_root, "layout", "baseline.json")

def make_env():
    return MineEvacEnv(layout_path=layout_path, max_steps=500)

# 向量化环境（多进程并行）
env = make_vec_env(make_env, n_envs=4)

model = PPO(
    "MlpPolicy",
    env,
    verbose=1,
)

model.learn(total_timesteps=100_000)

model.save(os.path.join(project_root, "models", "ppo_mine_evac_baseline"))
env.close()


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 500      |
|    ep_rew_mean     | 319      |
| time/              |          |
|    fps             | 13779    |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 8192     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 500         |
|    ep_rew_mean          | 319         |
| time/                   |             |
|    fps                  | 6373        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.009200614 |
|    clip_fraction        | 0.0307      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.6        |
|    explained_variance   | -0.00177    |
|    learning

In [2]:
import os
from src.mine_evac_env import MineEvacEnv

project_root = os.path.dirname(os.path.abspath("./mineEvac-python"))
layout_path = os.path.join(project_root, "layout", "baseline.json")
env = MineEvacEnv(layout_path=layout_path, max_steps=50)

obs, info = env.reset()
print("obs shape:", obs.shape)
print("obs:", obs)

for t in range(5):
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    print(f"t={t}, action={action}, reward={reward}, terminated={terminated}, truncated={truncated}")
    if terminated or truncated:
        break


obs shape: (9,)
obs: [0.01123596 0.42857143 0.         0.         0.         0.
 0.         0.         0.        ]
t=0, action=4, reward=-1.0, terminated=False, truncated=False
t=1, action=1, reward=-1.0, terminated=False, truncated=False
t=2, action=0, reward=-1.0, terminated=False, truncated=False
t=3, action=0, reward=-1.0, terminated=False, truncated=False
t=4, action=0, reward=-1.0, terminated=False, truncated=False


In [9]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import os

from src.mine_evac_env import MineEvacEnv

project_root = os.path.dirname(os.path.abspath("./mineEvac-python"))
layout_path = os.path.join(project_root, "layout", "baseline.json")

def make_env():
    return MineEvacEnv(layout_path=layout_path, max_steps=100)

# 先用 1 个 env，调试更直观
env = make_vec_env(make_env, n_envs=1)

model = PPO(
    "MlpPolicy",
    env,
    verbose=1,        # 一定要 verbose=1 才有 stdout log
)

# 把训练步数调得超级小，先确认 log 是否正常打印
model.learn(total_timesteps=2000)

env.close()


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | -0.9     |
| time/              |          |
|    fps             | 4632     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------


In [6]:
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
import os
from src.mine_evac_env import MineEvacEnv

project_root = os.path.dirname(os.path.abspath("./mineEvac-python"))
layout_path = os.path.join(project_root, "layout", "baseline.json")

def make_env():
    return MineEvacEnv(layout_path=layout_path, max_steps=100)

env = make_vec_env(make_env, n_envs=1)

model = PPO("MlpPolicy", env, verbose=1)

model.learn(total_timesteps=2000)

env.close()


Using cpu device
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | -0.9     |
| time/              |          |
|    fps             | 3262     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------


In [7]:
obs, info = env.reset()
for t in range(5):
    a = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(a)
    if terminated or truncated:
        break


ValueError: not enough values to unpack (expected 2, got 1)

In [16]:


import os
from src.mine_evac_env import debug_plot_baseline_layout

project_root = os.path.dirname(os.path.abspath("./mineEvac-python"))
layout_path = os.path.join(project_root, "layout", "baseline.json")

debug_plot_baseline_layout(layout_path)


ImportError: cannot import name 'debug_plot_baseline_layout' from 'src.mine_evac_env' (/Users/szy/Library/Mobile Documents/com~apple~CloudDocs/2025/HiMCM-2025/HiMCMing/SGJ/mineEvac-python/src/mine_evac_env.py)

In [None]:
import gymnasium as gym
from stable_baselines3 import PPO

# 创建环境
env = gym.make("CartPole-v1")

# 创建模型
model = PPO("MlpPolicy", env, verbose=1)

# 训练模型
model.learn(total_timesteps=10000)

# 保存与加载
model.save("../models/ppo_cartpole")
model = PPO.load("../models/ppo_cartpole", env=env)

# 预测动作
obs, _ = env.reset()
action, _ = model.predict(obs, deterministic=True)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 20.9     |
|    ep_rew_mean     | 20.9     |
| time/              |          |
|    fps             | 5041     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 27.6       |
|    ep_rew_mean          | 27.6       |
| time/                   |            |
|    fps                  | 3284       |
|    iterations           | 2          |
|    time_elapsed         | 1          |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.00923134 |
|    clip_fraction        | 0.111      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.68

In [18]:
const mineflayer = require('mineflayer')
const mineflayerViewer = require('prismarine-viewer').mineflayer

const bot = mineflayer.createBot({
  username: 'Bot'
})

bot.once('spawn', () => {
  mineflayerViewer(bot, { port: 3000 }) // Start the viewing server on port 3000

  // Draw the path followed by the bot
  const path = [bot.entity.position.clone()]
  bot.on('move', () => {
    if (path[path.length - 1].distanceTo(bot.entity.position) > 1) {
      path.push(bot.entity.position.clone())
      bot.viewer.drawLine('path', path)
    }
  })
})

SyntaxError: invalid syntax (2228020204.py, line 1)