In [1]:
import random
import collections
import heapq
from collections import deque
from typing import Tuple, List

def generate_branchy_maze(
    n: int,
    branchiness: float = 0.8,
    farthest_goal: bool = True
) -> Tuple[Tuple[int, int], Tuple[int, int], List[List[int]]]:
    """
    随机生成一张“分叉可调”的 perfect maze，并随机选取 start / goal。

    参数
    ----
    n            : 迷宫边长 (方形，单位格)
    branchiness  : 0‑1，越大分叉越多；≈0 -> DFS，≈1 -> Prim
    farthest_goal: True -> 选离 start 最远的格子当 goal；False -> 随机可达格

    返回
    ----
    (start, goal, maze)   其中 maze[i][j] == 0 表路, 1 表墙
    """
    # ---------- 初始化 ----------
    maze = [[1] * n for _ in range(n)]

    # start 随机挑一个偶数坐标 (保证格子间隔 2 时相邻仍在网格内)
    def rand_even(limit):                       # 0,2,4,… < limit
        max_even = limit - 1 if limit % 2 else limit - 2
        return random.randrange(0, max_even + 1, 2)

    start = (rand_even(n), rand_even(n))

    # ---------- Growing‑Tree 主循环 ----------
    def neighbors(x, y):
        # 返回: (邻居 x, 邻居 y, (wx, wy) 墙坐标增量)
        for dx, dy in [(0, 2), (2, 0), (0, -2), (-2, 0)]:
            nx, ny = x + dx, y + dy
            if 0 <= nx < n and 0 <= ny < n:
                yield nx, ny, dx // 2, dy // 2

    maze[start[0]][start[1]] = 0
    active = [start]

    while active:
        idx = -1 if random.random() > branchiness else random.randrange(len(active))
        x, y = active[idx]

        unvisited = [(nx, ny, wx, wy) for nx, ny, wx, wy in neighbors(x, y)
                     if maze[nx][ny] == 1]

        if unvisited:
            nx, ny, wx, wy = random.choice(unvisited)
            maze[x + wx][y + wy] = 0         # 打通墙
            maze[nx][ny] = 0
            active.append((nx, ny))
        else:
            active.pop(idx)                  # 死胡同：移除

    # ---------- 选取 goal ----------
    def bfs_farthest(src):
        """BFS 找到离 src 最远的可通行格；返回坐标"""
        vis = {src}
        q = deque([(src[0], src[1], 0)])
        far, far_dist = src, 0
        while q:
            x, y, d = q.popleft()
            if d > far_dist:
                far, far_dist = (x, y), d
            for dx, dy in [(0, 1), (1, 0), (0, -1), (-1, 0)]:
                nx, ny = x + dx, y + dy
                if 0 <= nx < n and 0 <= ny < n and maze[nx][ny] == 0 and (nx, ny) not in vis:
                    vis.add((nx, ny))
                    q.append((nx, ny, d + 1))
        return far

    if farthest_goal:
        goal = bfs_farthest(start)
    else:
        path_cells = [(i, j) for i in range(n) for j in range(n)
                      if maze[i][j] == 0 and (i, j) != start]
        goal = random.choice(path_cells)

    # 保证 start / goal 两格都是路
    maze[start[0]][start[1]] = 0
    maze[goal[0]][goal[1]]   = 0

    return start, goal, maze


# ────────────────────────── 2. 工具函数 ──────────────────────────
DIRS = [ (0, -1), (0, 1), (-1, 0), (1, 0) ]          # 左、右、上、下
def to1(p):                                           # 0‑based → 字符串 "(x, y)" 1‑based
    return f"({p[0] + 1}, {p[1] + 1})"

def observation_str(pos, maze, goal):
    n, (x, y) = len(maze), pos
    parts = []
    for dx, dy in DIRS:                               # 左→右→上→下
        nx, ny = x + dx, y + dy
        if 0 <= nx < n and 0 <= ny < n:
            if (nx, ny) == goal:
                state = "exit"
            else:
                state = "path" if maze[nx][ny] == 0 else "wall"
        else:
            state = "wall"
        parts.append(f"({nx + 1}, {ny + 1}): {state}")
    return "; ".join(parts)

# 已知网格内的 BFS 最短路（返回 deque，空表示不可达或就在原地）
def shortest_path(src, dst, walkable):
    if src == dst:
        return collections.deque()
    q = collections.deque([src])
    parent = {src: None}
    while q:
        cur = q.popleft()
        if cur == dst:
            break
        for dx, dy in DIRS:
            nxt = (cur[0] + dx, cur[1] + dy)
            if nxt in walkable and nxt not in parent:
                parent[nxt] = cur
                q.append(nxt)
    if dst not in parent:
        return collections.deque()
    path = collections.deque()
    cur = dst
    while cur != src:
        path.appendleft(cur)
        cur = parent[cur]
    return path


# ────────────────────────── 4. DEMO ──────────────────────────
if __name__ == "__main__":
    start, goal, maze = generate_branchy_maze(7)   # 或者用你自己的 maze
    print("Start:", start)
    print("End:", goal)
    for row in maze:
        print(row)


Start: (0, 2)
End: (6, 2)
[0, 0, 0, 0, 0, 0, 0]
[0, 1, 0, 1, 0, 1, 0]
[0, 1, 0, 1, 0, 1, 0]
[0, 1, 0, 1, 1, 1, 0]
[0, 1, 0, 0, 0, 1, 0]
[0, 1, 1, 1, 1, 1, 0]
[0, 1, 0, 0, 0, 0, 0]


In [None]:
def build_chat_history(maze, start, goal):
    """
    返回符合 OpenAI ChatCompletion 输入格式的 messages 列表：
    user ⇒ observation
    assistant ⇒ move
    """
    DIRS = [(0, -1), (0, 1), (-1, 0), (1, 0)]     # 左右上下

    def to1(p):                                    # (0‑based) → "(x, y)" (1‑based)
        return f"({p[0] + 1}, {p[1] + 1})"

    def obs_content(pos):
        n, (x, y) = len(maze), pos
        parts = []
        for dx, dy in DIRS:                        # 左→右→上→下
            nx, ny = x + dx, y + dy
            if 0 <= nx < n and 0 <= ny < n:
                state = (
                    "exit"  if (nx, ny) == goal else
                    "path"  if maze[nx][ny] == 0 else
                    "wall"
                )
            else:
                state = "wall"
            parts.append(f"({nx + 1}, {ny + 1}): {state}")
        return ", ".join(parts)                    # 用逗号分隔

    # ---------- 增量建图：与原 build_history_str 基本一致 ----------
    known, walls, frontier, task = {start}, set(), [], collections.deque()
    pos   = start
    gx, gy = goal
    messages = [{"role": "user", "content": obs_content(pos)}]

    def push_frontier(c):
        h = abs(c[0] - gx) + abs(c[1] - gy)
        heapq.heappush(frontier, (h, c))

    while True:
        # 1) 处理观测
        for dx, dy in DIRS:
            c = (pos[0] + dx, pos[1] + dy)
            if 0 <= c[0] < len(maze) and 0 <= c[1] < len(maze):
                if maze[c[0]][c[1]] == 0 or c == goal:
                    if c not in known:
                        known.add(c)
                        push_frontier(c)
                else:
                    walls.add(c)

        # 2) 结束
        if pos == goal:
            break

        # 3) 如无任务 ⇒ 规划
        if not task:
            if goal in known:
                task = shortest_path(pos, goal, known)
            while not task and frontier:
                _, tgt = heapq.heappop(frontier)
                task = shortest_path(pos, tgt, known)
            if not task:
                raise RuntimeError("No reachable target")

        # 4) 执行一步
        nxt = task.popleft()
        messages.append({"role": "assistant", "content": to1(nxt)})
        pos = nxt
        if pos != goal:                            # 到终点就不再发送观测
            messages.append({"role": "user", "content": obs_content(pos)})

    return messages


msgs = build_chat_history(maze, start, goal)

import json, pprint
pprint.pprint(msgs)                 # 直接看结构


In [3]:
msgs = []
for i in range(100000):
    start, goal, maze = generate_branchy_maze(7)
    msg = build_chat_history(maze, start, goal)
    msgs.append(msg)

In [10]:
print(msgs[0])

[{'role': 'user', 'content': '(1, 0): wall, (1, 2): path, (0, 1): wall, (2, 1): path'}, {'role': 'assistant', 'content': '(1, 2)'}, {'role': 'user', 'content': '(1, 1): path, (1, 3): path, (0, 2): wall, (2, 2): wall'}, {'role': 'assistant', 'content': '(1, 3)'}, {'role': 'user', 'content': '(1, 2): path, (1, 4): path, (0, 3): wall, (2, 3): path'}, {'role': 'assistant', 'content': '(1, 4)'}, {'role': 'user', 'content': '(1, 3): path, (1, 5): path, (0, 4): wall, (2, 4): wall'}, {'role': 'assistant', 'content': '(1, 5)'}, {'role': 'user', 'content': '(1, 4): path, (1, 6): path, (0, 5): wall, (2, 5): wall'}, {'role': 'assistant', 'content': '(1, 6)'}, {'role': 'user', 'content': '(1, 5): path, (1, 7): path, (0, 6): wall, (2, 6): wall'}, {'role': 'assistant', 'content': '(1, 7)'}, {'role': 'user', 'content': '(1, 6): path, (1, 8): wall, (0, 7): wall, (2, 7): wall'}, {'role': 'assistant', 'content': '(1, 6)'}, {'role': 'user', 'content': '(1, 5): path, (1, 7): path, (0, 6): wall, (2, 6): wal

In [23]:
import pandas as pd
from datasets import Dataset, Features, Sequence, Value

# 假设 msgs 是 list[list[dict]]
df = pd.DataFrame({"chat": msgs})


# 2️⃣ DataFrame ➜ Dataset
ds = Dataset.from_pandas(df)

In [4]:
import pandas as pd

# 假设 msgs = [ [ {"role":"user","content":"Hi"}, ... ],   # 第 0 条
#               [ {"role":"assistant","content":"Hello"}, ... ], ... ]

rows = [{'chat': m} for m in msgs]         # 每行是 {'chat': <list[dict]>}
df   = pd.DataFrame({'extra_info': rows})  # 只有一列 extra_info
df.to_parquet('/projectnb/rlhf/mingyuc/DisCO/datasets/maze/train100000.parquet',
              engine='pyarrow',           # 推荐，用 Arrow 写更快
              index=False)                # 不把行索引写进去


In [5]:
print(df)

                                              extra_info
0      {'chat': [{'role': 'user', 'content': '(5, 4):...
1      {'chat': [{'role': 'user', 'content': '(1, 4):...
2      {'chat': [{'role': 'user', 'content': '(7, 6):...
3      {'chat': [{'role': 'user', 'content': '(1, 4):...
4      {'chat': [{'role': 'user', 'content': '(5, 0):...
...                                                  ...
99995  {'chat': [{'role': 'user', 'content': '(5, 0):...
99996  {'chat': [{'role': 'user', 'content': '(5, 0):...
99997  {'chat': [{'role': 'user', 'content': '(3, 6):...
99998  {'chat': [{'role': 'user', 'content': '(1, 6):...
99999  {'chat': [{'role': 'user', 'content': '(1, 6):...

[100000 rows x 1 columns]


In [24]:
print(ds)

Dataset({
    features: ['chat'],
    num_rows: 10000
})


In [13]:
ds.push_to_hub(
    "MYC081/maze-sft"
)

Creating parquet from Arrow format: 100%|██████████| 10/10 [00:00<00:00, 163.76ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]


CommitInfo(commit_url='https://huggingface.co/datasets/MYC081/maze-sft/commit/9f8ec14685a0d56b833a681f22ae2f56e7640e2d', commit_message='Upload dataset', commit_description='', oid='9f8ec14685a0d56b833a681f22ae2f56e7640e2d', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/MYC081/maze-sft', endpoint='https://huggingface.co', repo_type='dataset', repo_id='MYC081/maze-sft'), pr_revision=None, pr_num=None)

In [1]:
import pandas as pd

# pip install -U pandas pyarrow
df = pd.read_parquet("/projectnb/rlhf/mingyuc/TinyZero/TinyZero_old/data/gsm8k/train.parquet")   # 若是 S3 路径同样支持
print(df['extra_info'])



0       {'answer': 'Natalia sold 48/2 = <<48/2=24>>24 ...
1       {'answer': 'Weng earns 12/60 = $<<12/60=0.2>>0...
2       {'answer': 'In the beginning, Betty has only 1...
3       {'answer': 'Maila read 12 x 2 = <<12*2=24>>24 ...
4       {'answer': 'He writes each friend 3*2=<<3*2=6>...
                              ...                        
7468    {'answer': 'For the distance she traveled, Eli...
7469    {'answer': 'He makes $.5 profit on each bracel...
7470    {'answer': 'Tony can skip at twice the speed t...
7471    {'answer': 'Janet needs 35 lunches for the kid...
7472    {'answer': 'If Anika is 30 now, in 15 years, s...
Name: extra_info, Length: 7473, dtype: object
