# 📘 Word → CSV → YAML → RST → Sphinx(HTML/PDF) 全自动流水线 v4
本版本支持 **XML 层解析嵌套表格**，确保参数取值不会丢失；并在 RST 中渲染为内嵌表格。
将 `at-parameter-demo.docx` 放在与本 Notebook 同目录。

In [None]:
# Step 0 — 安装依赖（首次运行）
!pip install -q python-docx pandas pyyaml jinja2 sphinx sphinx_rtd_theme lxml

## Step 1 — Word → CSV（支持 XML 层嵌套表格解析）

In [None]:
import os, re, json
import pandas as pd
from docx import Document
from docx.oxml.text.paragraph import CT_P
from docx.oxml.table import CT_Tbl
from lxml import etree

IN_WORD = 'at-parameter-demo.docx'
CSV_DIR = 'data'
CSV_OUT = os.path.join(CSV_DIR, 'extracted_commands.csv')
os.makedirs(CSV_DIR, exist_ok=True)

def iter_ordered_blocks(doc):
    body = doc._element.body
    tbl_idx = 0
    for child in body.iterchildren():
        if isinstance(child, CT_P):
            text = ''.join([t.text for t in child.xpath('.//w:t') if t.text]).strip()
            yield ('p', text)
        elif isinstance(child, CT_Tbl):
            table_obj = doc.tables[tbl_idx]
            tbl_idx += 1
            yield ('tbl', table_obj)

def cell_plain_text(cell):
    parts = [p.text.strip() for p in cell.paragraphs if p.text and p.text.strip()]
    return '\n'.join(parts).strip()

def cell_value_map_from_nested_table(cell):
    xml_str = cell._tc.xml
    root = etree.fromstring(xml_str.encode('utf-8'))
    ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
    tables = root.findall('.//w:tbl', ns)
    if not tables:
        return {}
    mapping = {}
    for tbl in tables:
        rows = tbl.findall('.//w:tr', ns)
        for r in rows:
            cells = r.findall('.//w:tc', ns)
            if len(cells) >= 2:
                k = ''.join(t.text for t in cells[0].iterfind('.//w:t', ns) if t.text).strip()
                v = ''.join(t.text for t in cells[1].iterfind('.//w:t', ns) if t.text).strip()
                if k:
                    mapping[k] = v
    return mapping

def parse_enum_map_advanced(text):
    if not text:
        return {}
    pairs = re.split(r'[，,;；\n]+', text.strip())
    result = {}
    for p in pairs:
        if ':' in p or '：' in p:
            k, v = re.split(r'[:：]', p, 1)
            k, v = k.strip(), v.strip()
            if k:
                result[k] = v
    return result

def extract_word_to_csv(docx_path, csv_out):
    doc = Document(docx_path)
    seq = list(iter_ordered_blocks(doc))
    cmd_line_pat = re.compile(r'^\s*(AT\S*?)\s*[:：]\s*(.*)$')
    results = []
    current_cmd = None
    current_title = ''
    wait_param_tbl = False

    for typ, obj in seq:
        if typ == 'p':
            text = obj
            m = cmd_line_pat.match(text)
            if m:
                current_cmd = m.group(1)
                current_title = (m.group(2) or '').strip()
                wait_param_tbl = False
                continue
            if text.strip() == '参数' and current_cmd:
                wait_param_tbl = True
                continue

        elif typ == 'tbl' and current_cmd and wait_param_tbl:
            table = obj
            params = []
            for r in table.rows:
                cols = r.cells
                if not any(c.text.strip() for c in cols):
                    continue
                name = cell_plain_text(cols[0]) if len(cols) > 0 else ''
                desc = cell_plain_text(cols[1]) if len(cols) > 1 else ''
                values = {}
                if len(cols) > 2:
                    values = cell_value_map_from_nested_table(cols[2])
                    if not values:
                        values = parse_enum_map_advanced(cell_plain_text(cols[2]))
                if not values:
                    values = cell_value_map_from_nested_table(cols[1]) or parse_enum_map_advanced(desc)

                if name in ('参数', '参数名', 'Name') and ('描述' in desc or 'Description' in desc):
                    continue

                params.append({'name': name, 'desc': desc, 'values': values})

            if params:
                results.append({
                    '命令': current_cmd,
                    '命令标题': current_title,
                    '命令类型': '执行;查询',
                    '命令格式': current_cmd,
                    '示例命令': current_cmd,
                    '示例响应': '',
                    '功能描述': current_title,
                    '备注': '',
                    '参数JSON': json.dumps(params, ensure_ascii=False)
                })
            wait_param_tbl = False
            current_cmd = None

    df = pd.DataFrame(results)
    df.to_csv(csv_out, index=False, encoding='utf-8-sig')
    print(f'✅ 提取 {len(df)} 条命令 → {csv_out}')
    return df

df_csv = extract_word_to_csv(IN_WORD, CSV_OUT)
df_csv.head()

## Step 2 — CSV → YAML

In [None]:
import yaml, json
YAML_OUT = os.path.join(CSV_DIR, 'all_commands.yaml')

def csv_to_yaml(csv_path, yaml_path):
    df = pd.read_csv(csv_path, dtype=str).fillna('')
    cmd_objects = []
    for _, r in df.iterrows():
        params = json.loads(r['参数JSON']) if r['参数JSON'] else []
        cmd_objects.append({
            'command': r['命令'],
            'title': r['命令标题'],
            'type': [t.strip() for t in r['命令类型'].split(';') if t.strip()],
            'formats': [f.strip() for f in r['命令格式'].split('|') if f.strip()] or [r['命令格式']],
            'parameters': params,
            'examples': [
                {'cmd': c.strip(), 'resp': e.strip()}
                for c, e in zip((r['示例命令'] or '').split('|'), (r['示例响应'] or '').split('|'))
                if c.strip() or e.strip()
            ],
            'description': r.get('功能描述',''),
            'notes': r.get('备注','')
        })
    with open(yaml_path, 'w', encoding='utf-8') as f:
        yaml.safe_dump({'commands': cmd_objects}, f, allow_unicode=True, sort_keys=False)
    print(f'✅ 已生成 YAML → {yaml_path}')

csv_to_yaml(CSV_OUT, YAML_OUT)

## Step 3 — YAML → RST（渲染嵌套 values 表格）

In [None]:
from jinja2 import Template
import yaml, os
RST_DIR = os.path.join('data', 'rst_output')
os.makedirs(RST_DIR, exist_ok=True)

RST_TMPL = Template('''
{{ cmd.command }}
{{ '=' * cmd.command|length }}

**Title**: {{ cmd.title }}
**Types**: {{ cmd.type|join(', ') }}

Formats::
{%- for f in cmd.formats %}
   {{ f }}
{%- endfor %}

Parameters
----------
.. list-table::
   :header-rows: 1
   :widths: 18 34 48

   * - Name
     - Description
     - Values
{%- for p in cmd.parameters %}
   * - {{ p.name }}
     - {{ p.desc or '—' }}
     - {%- if p.values is mapping and p.values|length > 0 %}

       .. list-table::
          :header-rows: 1
          :widths: 20 40

          * - Key
            - Value
{%- for k,v in p.values.items() %}
          * - {{ k }}
            - {{ v }}
{%- endfor %}
       {%- else %} N/A {%- endif %}
{%- endfor %}

Examples
--------
{%- for ex in cmd.examples %}
.. code-block:: none

   {{ ex.cmd }}
   {{ ex.resp }}
{%- endfor %}

**Description**: {{ cmd.description or '' }}

{%- if cmd.notes %}
**Notes**: {{ cmd.notes }}
{%- endif %}
''')

def yaml_to_rst(yaml_path, rst_dir):
    with open(yaml_path, 'r', encoding='utf-8') as f:
        data = yaml.safe_load(f)
    cmds = data.get('commands', [])
    for cmd in cmds:
        rst_text = RST_TMPL.render(cmd=cmd)
        fname = f"{cmd['command']}.rst"
        with open(os.path.join(rst_dir, fname), 'w', encoding='utf-8') as fo:
            fo.write(rst_text)
    index_lines = ['AT Manual', '=========', '', '.. toctree::', '   :maxdepth: 1', '']
    for cmd in cmds:
        index_lines.append(f"   {cmd['command']}")
    with open(os.path.join(rst_dir, 'index.rst'), 'w', encoding='utf-8') as fo:
        fo.write('\n'.join(index_lines))
    print(f'✅ RST 已生成到 {rst_dir}')

yaml_to_rst(YAML_OUT, RST_DIR)

## Step 4 — 初始化 Sphinx（存在则跳过）

In [None]:
import shutil, os
DOCS_DIR = 'docs'
if not os.path.exists(DOCS_DIR):
    !sphinx-quickstart {DOCS_DIR} --sep --project 'AT Command Manual' --author 'Doc Team' --release '1.0' -q
else:
    print('⚠️ docs/ 已存在，跳过 sphinx-quickstart 初始化。')

conf_py = os.path.join(DOCS_DIR, 'source', 'conf.py')
if os.path.exists(conf_py):
    with open(conf_py, 'a', encoding='utf-8') as f:
        f.write('\nhtml_theme = "sphinx_rtd_theme"\n')

shutil.copytree('data/rst_output', os.path.join(DOCS_DIR, 'source'), dirs_exist_ok=True)
print('✅ RST 已复制到 docs/source/')

## Step 5 — 构建 HTML

In [None]:
!make -C docs html
print('\n✅ 构建完成，打开：docs/build/html/index.html')

## 🟢 一键执行：run_all()

In [None]:
def run_all():
    global df_csv
    df_csv = extract_word_to_csv(IN_WORD, CSV_OUT)
    csv_to_yaml(CSV_OUT, YAML_OUT)
    yaml_to_rst(YAML_OUT, RST_DIR)
    import shutil, os
    if not os.path.exists('docs'):
        get_ipython().run_cell_magic('bash', '', 'sphinx-quickstart docs --sep --project "AT Command Manual" --author "Doc Team" --release "1.0" -q')
    with open('docs/source/conf.py','a',encoding='utf-8') as f:
        f.write('\nhtml_theme = "sphinx_rtd_theme"\n')
    shutil.copytree('data/rst_output', 'docs/source', dirs_exist_ok=True)
    get_ipython().run_cell_magic('bash', '', 'make -C docs html')
    print('\n✅ 完成：docs/build/html/index.html')
print('准备就绪。逐步运行或直接 run_all()。')