# 📘 Word → CSV → YAML → RST → Sphinx(HTML/PDF) 全自动流水线 v4
本版本支持 **XML 层解析嵌套表格**，确保参数取值不会丢失；并在 RST 中渲染为内嵌表格。
将 `at-parameter-demo.docx` 放在与本 Notebook 同目录。

In [1]:
# Step 0 — 安装依赖（首次运行）
!pip install -q python-docx pandas pyyaml jinja2 sphinx sphinx_rtd_theme lxml

## Step 1 — Word → CSV（支持 XML 层嵌套表格解析）

In [15]:
import re
from lxml import etree

PARAM_HEADING_PAT = re.compile(r'^\s*参数(说明|表)?\s*[:：]?\s*$', re.I)
CMD_LINE_PAT = re.compile(r'^\s*(AT\S*?)\s*[:：]\s*(.*)$')

def is_param_heading(text: str) -> bool:
    return bool(PARAM_HEADING_PAT.match(text or ""))

def is_cmd_heading(text: str) -> bool:
    return bool(CMD_LINE_PAT.match(text or ""))

def cell_plain_text(cell):
    parts = [p.text.strip() for p in cell.paragraphs if p.text and p.text.strip()]
    return "\n".join(parts).strip()

def find_nested_tbls_in_cell(cell):
    """从 cell._tc.xml 里找所有嵌套 <w:tbl>"""
    xml_str = cell._tc.xml
    root = etree.fromstring(xml_str.encode("utf-8"))
    ns = {"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"}
    return root.findall(".//w:tbl", ns), ns

def tbl_rows_as_text(tbl, ns):
    """把 <w:tbl> 转成二维数组文本（每个单元格拼接全部 w:t）"""
    rows = []
    for r in tbl.findall(".//w:tr", ns):
        cells = r.findall(".//w:tc", ns)
        row = []
        for c in cells:
            t = "".join(tn.text for tn in c.iterfind(".//w:t", ns) if tn.text).strip()
            row.append(t)
        rows.append(row)
    return rows

def looks_like_header(row):
    hdr = " ".join(row[:3])
    return any(k in hdr for k in ["参数", "名称", "Name", "描述", "说明", "含义", "取值", "值", "value", "meaning"])

def nested_table_to_kv(rows):
    """通用：>=2 列 -> key=第1列；value=第2..N列合并"""
    if not rows:
        return {}
    start = 1 if looks_like_header(rows[0]) else 0
    kv = {}
    for r in rows[start:]:
        if not r:
            continue
        key = (r[0] or "").strip()
        val = " | ".join([c for c in r[1:] if c and c.strip()]) if len(r) > 1 else ""
        if key:
            kv[key] = val
    return kv

def cell_value_map_from_nested_table(cell):
    tbls, ns = find_nested_tbls_in_cell(cell)
    mapping = {}
    for t in tbls:
        rows = tbl_rows_as_text(t, ns)
        kv = nested_table_to_kv(rows)
        mapping.update(kv)
    return mapping

def parse_enum_map_fuzzy(text):
    """
    从纯文本中提取键值对，支持：
    0: 未注册 / 0：未注册 / 0 未注册 / 0 -115 dBm / 0→未注册 / 0 => 未注册
    多行或用 , ; ； 分隔均可。
    """
    if not text:
        return {}
    segs = re.split(r"[,\uFF0C;\uFF1B\n]+", text.strip())
    m = {}
    for s in segs:
        s = s.strip()
        if not s:
            continue
        # 先看冒号
        if ":" in s or "：" in s:
            k, v = re.split(r"[:：]", s, 1)
            k, v = k.strip(), v.strip()
        else:
            # 再看箭头/连字符/空格分隔
            m2 = re.match(r"^(\S+)\s*(?:->|→|=>|-|—|\s)\s*(.+)$", s)
            if m2:
                k, v = m2.group(1).strip(), m2.group(2).strip()
            else:
                # 只要以数字/枚举码开头 + 后面是中文/字母也收
                m3 = re.match(r"^([A-Za-z0-9\+\-\.]+)\s+(.+)$", s)
                if m3:
                    k, v = m3.group(1).strip(), m3.group(2).strip()
                else:
                    continue
        if k:
            m[k] = v
    return m


## Step 2 — CSV → YAML

In [12]:
import yaml, json
YAML_OUT = os.path.join(CSV_DIR, 'all_commands.yaml')

def csv_to_yaml(csv_path, yaml_path):
    df = pd.read_csv(csv_path, dtype=str).fillna('')
    cmd_objects = []
    for _, r in df.iterrows():
        params = json.loads(r['参数JSON']) if r['参数JSON'] else []
        cmd_objects.append({
            'command': r['命令'],
            'title': r['命令标题'],
            'type': [t.strip() for t in r['命令类型'].split(';') if t.strip()],
            'formats': [f.strip() for f in r['命令格式'].split('|') if f.strip()] or [r['命令格式']],
            'parameters': params,
            'examples': [
                {'cmd': c.strip(), 'resp': e.strip()}
                for c, e in zip((r['示例命令'] or '').split('|'), (r['示例响应'] or '').split('|'))
                if c.strip() or e.strip()
            ],
            'description': r.get('功能描述',''),
            'notes': r.get('备注','')
        })
    with open(yaml_path, 'w', encoding='utf-8') as f:
        yaml.safe_dump({'commands': cmd_objects}, f, allow_unicode=True, sort_keys=False)
    print(f'✅ 已生成 YAML → {yaml_path}')

csv_to_yaml(CSV_OUT, YAML_OUT)

✅ 已生成 YAML → data/all_commands.yaml


## Step 3 — YAML → RST（渲染嵌套 values 表格）

In [13]:
from jinja2 import Template
import yaml, os

RST_DIR = os.path.join('data', 'rst_output')
os.makedirs(RST_DIR, exist_ok=True)

RST_TMPL = Template('''
{{ cmd.command }}
{{ '=' * cmd.command|length }}

**Title**: {{ cmd.title }}
**Types**: {{ cmd.type|join(', ') }}

Formats::
{%- for f in cmd.formats %}
   {{ f }}
{%- endfor %}

Parameters
----------
.. list-table::
   :header-rows: 1
   :widths: 18 34 48

   * - Name
     - Description
     - Values
{%- for p in cmd.parameters %}
   * - {{ p.name }}
     - {{ p.desc or '—' }}
     - {%- if p.values is defined and p.values and p.values.items is defined %}
       
       .. list-table::
          :header-rows: 1
          :widths: 20 40

          * - Key
            - Value
{%- for k,v in p.values.items() %}
          * - {{ k }}
            - {{ v }}
{%- endfor %}
       {%- else %} N/A {%- endif %}
{%- endfor %}

Examples
--------
{%- for ex in cmd.examples %}
.. code-block:: none

   {{ ex.cmd }}
   {{ ex.resp }}
{%- endfor %}

**Description**: {{ cmd.description or '' }}

{%- if cmd.notes %}
**Notes**: {{ cmd.notes }}
{%- endif %}
''')

def yaml_to_rst(yaml_path, rst_dir):
    import pprint
    with open(yaml_path, 'r', encoding='utf-8') as f:
      data = yaml.safe_load(f)
    pprint.pprint(data)

    with open(yaml_path, 'r', encoding='utf-8') as f:
        data = yaml.safe_load(f)
    cmds = data.get('commands', [])
    # 强制确保每个 parameter 的 values 都是 dict（防止 None 或其他类型带来模板报错）
    for cmd in cmds:
        for p in cmd.get('parameters', []):
            if not isinstance(p.get('values', {}), dict):
                p['values'] = {}
    for cmd in cmds:
        rst_text = RST_TMPL.render(cmd=cmd)
        fname = f"{cmd['command']}.rst"
        with open(os.path.join(rst_dir, fname), 'w', encoding='utf-8') as fo:
            fo.write(rst_text)
    index_lines = ['AT Manual', '=========', '', '.. toctree::', '   :maxdepth: 1', '']
    for cmd in cmds:
        index_lines.append(f"   {cmd['command']}")
    with open(os.path.join(rst_dir, 'index.rst'), 'w', encoding='utf-8') as fo:
        fo.write('\n'.join(index_lines))
    print(f'✅ RST 已生成到 {rst_dir}')

# 用法示例
yaml_to_rst(YAML_OUT, RST_DIR)



{'commands': [{'command': 'ATI',
               'description': '获取模组厂商信息',
               'examples': [{'cmd': 'ATI', 'resp': ''}],
               'formats': ['ATI'],
               'notes': '',
               'parameters': [{'desc': '模组厂商信息、产品名称、版本号',
                               'name': '<manufacturer>',
                               'values': {}},
                              {'desc': '模组型号',
                               'name': '<module_version>',
                               'values': {}},
                              {'desc': '模组软件版本',
                               'name': '<soft_version>',
                               'values': {}}],
               'title': '获取模组厂商信息',
               'type': ['执行', '查询']},
              {'command': 'AT+GMR',
               'description': '查询版本信息',
               'examples': [{'cmd': 'AT+GMR', 'resp': ''}],
               'formats': ['AT+GMR'],
               'notes': '',
               'parameters': [{'desc': '模组软件版本信息',
            

## Step 4 — 初始化 Sphinx（存在则跳过）

In [5]:
import shutil, os
DOCS_DIR = 'docs'
if not os.path.exists(DOCS_DIR):
    !sphinx-quickstart {DOCS_DIR} --sep --project 'AT Command Manual' --author 'Doc Team' --release '1.0' -q
else:
    print('⚠️ docs/ 已存在，跳过 sphinx-quickstart 初始化。')

conf_py = os.path.join(DOCS_DIR, 'source', 'conf.py')
if os.path.exists(conf_py):
    with open(conf_py, 'a', encoding='utf-8') as f:
        f.write('\nhtml_theme = "sphinx_rtd_theme"\n')

shutil.copytree('data/rst_output', os.path.join(DOCS_DIR, 'source'), dirs_exist_ok=True)
print('✅ RST 已复制到 docs/source/')

⚠️ docs/ 已存在，跳过 sphinx-quickstart 初始化。
✅ RST 已复制到 docs/source/


## Step 5 — 构建 HTML

In [6]:
!make -C docs html
print('\n✅ 构建完成，打开：docs/build/html/index.html')

[01mRunning Sphinx v8.2.3[39;49;00m
[01mloading translations [en]... [39;49;00mdone
[01mloading pickled environment... [39;49;00mfailed: source directory has changed
done
[01mbuilding [mo]: [39;49;00mtargets for 0 po files that are out of date
[01mwriting output... [39;49;00m
[01mbuilding [html]: [39;49;00mtargets for 4 source files that are out of date
[01mupdating environment: [39;49;00m[new config] 4 added, 0 changed, 0 removed
[2K[01mreading sources... [39;49;00m[100%] [35mindex[39;49;00mm
[01mlooking for now-outdated files... [39;49;00mnone found
[01mpickling environment... [39;49;00mdone
[01mchecking consistency... [39;49;00mdone
[01mpreparing documents... [39;49;00mdone
[01mcopying assets... [39;49;00m
[01mcopying static files... [39;49;00m
Writing evaluated template result to /Users/pika/Documents/GitHub/docs-as-code-learning/pipeline-1008/docs/build/html/_static/basic.css
Writing evaluated template result to /Users/pika/Documents/GitHub/docs-as-c

## 🟢 一键执行：run_all()

In [7]:
def run_all():
    global df_csv
    df_csv = extract_word_to_csv(IN_WORD, CSV_OUT)
    csv_to_yaml(CSV_OUT, YAML_OUT)
    yaml_to_rst(YAML_OUT, RST_DIR)
    import shutil, os
    if not os.path.exists('docs'):
        get_ipython().run_cell_magic('bash', '', 'sphinx-quickstart docs --sep --project "AT Command Manual" --author "Doc Team" --release "1.0" -q')
    with open('docs/source/conf.py','a',encoding='utf-8') as f:
        f.write('\nhtml_theme = "sphinx_rtd_theme"\n')
    shutil.copytree('data/rst_output', 'docs/source', dirs_exist_ok=True)
    get_ipython().run_cell_magic('bash', '', 'make -C docs html')
    print('\n✅ 完成：docs/build/html/index.html')
print('准备就绪。逐步运行或直接 run_all()。')

准备就绪。逐步运行或直接 run_all()。
