From 22172a67095fbbc56626f917a0dac9f53df6ea5f Mon Sep 17 00:00:00 2001 From: che cheng Date: Thu, 2 Jul 2026 13:35:52 +0800 Subject: [PATCH 1/6] feat: convert macdoc into a Claude Code plugin marketplace (#112) - .claude-plugin/marketplace.json: 4 plugin entries (name: macdoc) - plugins/che-word-mcp, plugins/macdoc: verbatim copies from psychquant-claude-plugins (cp -R + diff -r byte-identical audit) - plugins/che-pdf-mcp, plugins/che-pptx-mcp: new binary-backed shells (wrapper auto-download from each repo's v0.1.0 signed release) - README.md / CLAUDE.md: marketplace install guide + dual-identity note Refs #112 --- .claude-plugin/marketplace.json | 51 ++ CLAUDE.md | 2 + README.md | 20 + .../che-pdf-mcp/.claude-plugin/plugin.json | 8 + plugins/che-pdf-mcp/.mcp.json | 7 + plugins/che-pdf-mcp/CHANGELOG.md | 12 + plugins/che-pdf-mcp/README.md | 16 + .../che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh | 89 ++++ .../che-pdf-mcp/skills/che-pdf-mcp/SKILL.md | 296 ++++++++++++ .../che-pptx-mcp/.claude-plugin/plugin.json | 8 + plugins/che-pptx-mcp/.mcp.json | 7 + plugins/che-pptx-mcp/CHANGELOG.md | 12 + plugins/che-pptx-mcp/README.md | 16 + .../che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh | 89 ++++ .../che-word-mcp/.claude-plugin/plugin.json | 17 + plugins/che-word-mcp/.mcp.json | 7 + plugins/che-word-mcp/CHANGELOG.md | 236 ++++++++++ plugins/che-word-mcp/CLAUDE.md | 107 +++++ plugins/che-word-mcp/README.md | 442 ++++++++++++++++++ .../che-word-mcp/bin/che-word-mcp-wrapper.sh | 92 ++++ .../che-word-mcp/skills/che-word-mcp/SKILL.md | 261 +++++++++++ plugins/macdoc/.claude-plugin/plugin.json | 8 + plugins/macdoc/CHANGELOG.md | 17 + plugins/macdoc/skills/macdoc/SKILL.md | 332 +++++++++++++ 24 files changed, 2152 insertions(+) create mode 100644 .claude-plugin/marketplace.json create mode 100644 plugins/che-pdf-mcp/.claude-plugin/plugin.json create mode 100644 plugins/che-pdf-mcp/.mcp.json create mode 100644 plugins/che-pdf-mcp/CHANGELOG.md create mode 100644 plugins/che-pdf-mcp/README.md create mode 100755 plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh create mode 100644 plugins/che-pdf-mcp/skills/che-pdf-mcp/SKILL.md create mode 100644 plugins/che-pptx-mcp/.claude-plugin/plugin.json create mode 100644 plugins/che-pptx-mcp/.mcp.json create mode 100644 plugins/che-pptx-mcp/CHANGELOG.md create mode 100644 plugins/che-pptx-mcp/README.md create mode 100755 plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh create mode 100644 plugins/che-word-mcp/.claude-plugin/plugin.json create mode 100644 plugins/che-word-mcp/.mcp.json create mode 100644 plugins/che-word-mcp/CHANGELOG.md create mode 100644 plugins/che-word-mcp/CLAUDE.md create mode 100644 plugins/che-word-mcp/README.md create mode 100755 plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh create mode 100644 plugins/che-word-mcp/skills/che-word-mcp/SKILL.md create mode 100644 plugins/macdoc/.claude-plugin/plugin.json create mode 100644 plugins/macdoc/CHANGELOG.md create mode 100644 plugins/macdoc/skills/macdoc/SKILL.md diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json new file mode 100644 index 0000000..8ac4b8d --- /dev/null +++ b/.claude-plugin/marketplace.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://anthropic.com/claude-code/marketplace.schema.json", + "name": "macdoc", + "description": "macdoc 文件處理生態系 Claude Code Plugin Marketplace — Word / PDF / PowerPoint MCP servers + macdoc CLI", + "owner": { + "name": "Che Cheng" + }, + "homepage": "https://github.com/PsychQuant/macdoc", + "plugins": [ + { + "name": "che-word-mcp", + "version": "3.20.0", + "description": "v3.20.0 cross-document OMath splice MCP tools (#160) — splice_omath_from_source + splice_paragraph_omath_from_source wrap ooxml-swift v0.24.0's spliceOMath API for verbatim copy of XML blocks between WordDocument paragraphs. Source via source_path (Direct mode read-only) or source_doc_id (Session mode); target requires session-mode doc_id. Position via atStart/atEnd/afterText/beforeText with optional anchor + instance. Carrier preservation (Run.rawXML stays inline, unrecognizedChildren stays direct-child); joint document-order index for omath_index across both source carriers. rpr_mode controls source Run rPr propagation (full default verbatim / omathOnly whitelist / discard empty); namespace_policy controls prefix vs URI handling (lenient default accepts mml: vs m: prefix mismatch with same URI per ECMA-376 / strict throws on any mismatch). Error taxonomy returned as structured strings: sourceHasNoOMath / omathIndexOutOfRange / targetParagraphOutOfRange / anchorNotFound / namespaceMismatch / contextAnchorNotFound (batch only). Unblocks kiki830621/collaboration_guo_analysis Phase 7 inline-math restoration pipeline. Tests: 8 new Issue160SpliceOMathFromSourceTests (Direct/Session source modes, atEnd/afterText positions, error taxonomy, batch mode, rPr discard). Full suite: 297 passing, 0 failures, 9 pre-existing skips. Bumps ooxml-swift dep 0.21.0 → 0.24.0. v3.19.0 caption detection (#136) + estimate_paragraph_for_page structural weights v2 (#142). #136: two-layer detection — Paragraph.style primary + expanded prefix set (English Tab./Fig./Listing + CJK 表3/圖1 no-separator + U+3000 ideographic space) with digit-after-prefix guard rejecting body sentences like \"Table reservations are required...\". 17 new tests. #142: walker upgrade getParagraphs → collectStructuralBlocks enum (.paragraph/.table/.imageOnlyParagraph/.displayEquationParagraph). Per-block weights (12pt thesis calibration): table = tableRows × avgCellChars (200/row fallback), image = +200/drawing, display eq = 120 chars. New structural_breakdown metadata (9 sub-fields). API method bumped char_count_heuristic → char_count_heuristic_v2. ~95× thesis figure-counting accuracy improvement. paragraph_index semantics + getParagraphs() preserved (30+ other callers unaffected). Tests: 6 new + #89 backward-compat passes (text-only fixtures unchanged, just method literal updated). 1 P3 follow-up filed (#159 display-eq fixture limitation). v3.18.1 transitive dependency bump — ooxml-swift v0.21.11 → v0.22.1. Closes PsychQuant/che-word-mcp#155 (parent / mirror) auto-resolves via Package.resolved bump alone (no MCP source change). Pre-fix: che-word-mcp__search_text MCP tool's Server.swift:10310 calls para.getText(), which was a legacy 7-line implementation that only joined runs.map { $0.text } + hyperlink.text — missed every walker enhancement landed in flattenedDisplayText() over the #85 / #92 / #99 / #100 / #101 / #102 / #103 cluster. Callers couldn't grep for inline math symbols (α / β / γ / θ / λ / t) — silent zero gaps in match positions. Post-fix: ooxml-swift#43 collapsed Paragraph.getText() to single-line return flattenedDisplayText(); two text-extraction paths now return identical strings. Server.swift:10310 still calls para.getText() but that method now traverses inline OMML correctly. Position arithmetic now matches before_text / after_text anchor-matching paths. Downstream impact: kiki830621/collaboration_guo_analysis#6 (thesis 30 inline math symbols) unblocked from automation — re-running search_text 進行t檢定 against 碩士論文.docx para 324 now returns a match. Released via /idd-all #43 --cwd cross-repo IDD orchestration (issue-driven-dev v2.40.0). Closing summary at https://github.com/PsychQuant/ooxml-swift/issues/43#issuecomment-4365430488. v3.18.0 insert_equation argument-contract hardening (closes #105 #106 #107). v3.17.8 closes PsychQuant/che-word-mcp#98 — insert_equation MCP handler refactored across 3 commits (91506f8/357cbe7/339ab77). Pre-fix three structural defects: (1) silent-clamp on out-of-range paragraph_index via non-throwing insertParagraph(_:at: Int) — tool reported success but inserted at wrong location; (2) lib's #84/#91 InsertLocation overload + InsertLocationError.inlineModeRequiresParagraphIndex / .invalidParagraphIndex(Int) structured errors never reached MCP callers (handler self-built OMML and bypassed lib); (3) inline mode (display_mode=false) always created a NEW Paragraph(runs: [eqRun]) instead of appending OMML run to the EXISTING paragraph at paragraph_index. v1 (91506f8) tried delegating to lib but Codex 6-AI verify caught P1 regression: lib's Document.insertEquation overload internally uses @available(*, deprecated, ...) MathEquation flat output (Field.swift:301) — emits truncated '(a)/(b' AND nested invalid OOXML. v2 (357cbe7) unified handler — both latex AND components paths use MathComponent.toOMML() for structurally correct OMML; display mode routes through lib's throwing insertParagraph(_:at: InsertLocation); inline mode handler-side appends OMML run to existing paragraph. v3 (339ab77) restored eqPara.properties.alignment = .center for display mode. BREAKING for inline mode callers: pre-fix inserted NEW paragraph; post-fix appends OMML run to EXISTING paragraph at paragraph_index. Migration: use display_mode=true for 'new paragraph with equation' or call insert_paragraph + insert_equation separately. 7 NEW Issue98InsertEquationLibBypassTests pin contracts (5 RED→GREEN scenarios + 2 quality regression tests including unzip -p document.xml verifying structure + centering survives + deprecated '(a)/(b)' pattern absent). 6-AI verify ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) caught both P1 regressions in v1; Codex sanity check on v2 caught the centering P2. 6 P2/P3 follow-up issues filed (#105-#110). Suite: 236 → 243 tests, 0 failures, 9 pre-existing skips. Backward compatible except inline-mode BREAKING (documented). v3.17.7 ooxml-swift dep bump 0.21.10 → 0.21.11 — closes 5-issue cluster PsychQuant/che-word-mcp #99 + #100 + #101 + #102 + #103. Bilateral mirror coverage for direct-child OMML at 4 wrapper positions ( direct child for Pandoc display math / direct child / direct child / nested wrapper combos), plus 2 NEW library-wide spec capabilities. Pre-fix Paragraph.flattenedDisplayText AND Document.replaceInParagraphSurfaces shared a symmetric blind spot: direct-child / (not wrapped in ) was silently dropped → anchor lookups against paragraphs containing display math silently 0-matched. Spectra change flatten-replace-omml-bilateral-coverage. 2 NEW spec capabilities promoted to openspec/specs/: ooxml-paragraph-text-mirror (mirror invariant + ReplaceResult informative refusal contract) + ooxml-library-design-principles (Correctness primacy + Human-like operations as foundational normative invariants for all ooxml-swift mutators). Read side: flattenedDisplayText walks direct-child OMML at all 4 wrapper positions with source-XML position ordering. Write side: NEW public API WordDocument.replaceTextWithBoundaryDetection returns ReplaceResult enum (.replaced(count:) / .refusedDueToOMMLBoundary(occurrences:) / .mixed(replacedCount:, refusedOccurrences:)) with Occurrence(matchSpan:, ommlSpans:) carrying flattened-text coordinates. Mirror invariant — asymmetric by design: reads include OMML visibleText (anchor lookup universe extends to math); writes treat OMML as opaque structural units (refuse cross-OMML mutation rather than silently delete equations). Decision 4 raw passthrough preserved: direct-child OMML stays in Paragraph.unrecognizedChildren / HyperlinkChild.rawXML(_) / AlternateContent.rawXML — no parser change, no writer change, round-trip fidelity unaffected. MCP impact: replace_text and other anchor-lookup tools now find paragraphs containing direct-child OMML at all 4 wrapper positions; existing replace_text MCP tool unchanged (backward-compatible). Tests: 236 passing che-word-mcp / 813→829 ooxml-swift (+16 in Issue99FlattenReplaceOMMLBilateralTests). Backward compatible — strict superset of pre-fix behavior. v3.17.6 ooxml-swift dep bump 0.21.9 → 0.21.10 — closes PsychQuant/che-word-mcp#104. Form-level FieldParser canonical 5-run fldChar fix (orthogonal to v3.17.5's #94 container-level fix). Pre-fix update_all_fields returned silent no-op ('no SEQ fields found') on docs containing valid SEQ paragraphs at body top level when fldChar block was emitted in canonical 5-run form (each // in its own sibling — what DocxReader produces post-roundtrip and what native Word always emits). Pre-fix worked only on in-memory wrap_caption_seq output before save. Two ooxml-swift commits land via this dep bump: 537de62 FieldParser two-phase parse (Phase-1 baked form + Phase-2 parseFiveRunSpan state machine probing both Run.rawXML and Run.rawElements per recognizedRunChildren = ['rPr','t','drawing','oMath','oMathPara'] allowlist) + 58fe4f9 P1 sub-fix surfaced by 6-AI verify (Logic + Devil's Advocate runtime test): canonical-branch Run.text rewrite was silently overridden by Run.toXML() rawXML short-circuit; new rewriteCanonicalCachedText helper splices new value into embedded while preserving + xml:space=preserve, AND keeps Run.text in sync. MCP impact: update_all_fields now finds and updates SEQ fields in canonical 5-run form (post-roundtrip / native Word emission); list_captions benefits transitively via shared FieldParser. Verified by 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) — 4 PASS / 2 WARN / 0 BLOCK; production reproducer rescue-swift-v317.docx via DocxReader path confirmed working. 5 P3 follow-ups filed in ooxml-swift (#29 SEQ Table coverage / #30 multi-paragraph counter / #31 multi-SEQ same paragraph / #32 DoS hardening / #33 discriminator invariant). Tests: 236 passing che-word-mcp / 809→813 ooxml-swift (+4 sub-tests in Issue104FieldParserCanonicalFormTests). Backward compatible — strict superset of pre-fix behavior. v3.17.5 ooxml-swift dep bump 0.21.8 → 0.21.9 — triple #87 + #93 + #94 release. #87 (Comment.paragraphIndex flat-counter, observable behavior change): list_comments paragraph_index now consistently 0-indexed against get_paragraphs() flat list; pre-fix off-by-N for any docx with non-paragraph BodyChild siblings before commented paragraph; callers manually compensating with paragraph_index - 1 must remove compensation. #93 (wrap_caption_seq SEQ inherits source position, caption visual fix): pre-fix 「圖 4-1:xxx」 became 「圖 4-:xxx1」 because new SEQ run had position=nil while source-loaded preText/postText had position>0; one-line fix seqRun.position = preRun.position; insert_bookmark=true × source-loaded paragraph still has same gap (filed PsychQuant/ooxml-swift#24, default insert_bookmark=false unaffected). #94 (update_all_fields traverses .table and .contentControl containers): pre-fix body loop only processed top-level .paragraph BodyChild, silently skipped .table and .contentControl(_, children:) — SEQ fields inside table cells/block-level SDTs never updated, returning 'no SEQ fields found' for thesis docs (caption paragraphs commonly live inside the table they describe); same gap #68 closed for findBodyChildContainingText; new walkAndProcessBodyChildForFields recursive walker mirrors #68 pattern; heading-count semantics: only top-level direct .paragraph body children count toward chapter-reset. Known incompleteness (3 follow-ups filed): ooxml-swift#25 header/footer/footnote/endnote SEQ scans still flat .paragraphs view; ooxml-swift#26 FieldParser.parse(paragraph:) misses inline SDT/hyperlink/fieldSimple/alternateContent surfaces; ooxml-swift#27 verify-with-user-fixture for real thesis docx roundtrip; plus ooxml-swift#28 refactor candidate (extract BodyChildVisitor protocol). Verified by 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh). Tests: 236 passing che-word-mcp / 805→809 ooxml-swift. Backward compatible except #87 documented behavior change. v3.17.4 paired #91 + #92 release. v3.17.3 bump ooxml-swift dep 0.21.6 → 0.21.7 (pure transitive bump, no MCP source changes; exposes public anchor lookup API). Three WordDocument APIs upgraded private → public (PsychQuant/che-word-mcp#86): findBodyChildContainingText(_:nthInstance:) instance method + bodyChildContainsText(_:needle:) static + tableContainsText(_:needle:) static. External Swift SPM consumers (rescue scripts, dxedit CLI, third-party tooling) can now call canonical anchor-lookup logic directly instead of reimplementing with diverging semantics (some skipped .contentControl recursion, some skipped table cell traversal pre-#68, some used different nthInstance counting rules). Result is exactly what insert_paragraph / insert_image_from_path / insert_caption etc. tools see when resolving after_text / before_text anchors. 10 new public-API surface tests in Issue86PublicAnchorLookupTests pin the canonical behavior across releases. Backward compatible: pure additive private → public visibility change; no API removals, no behavioral changes for existing callers. v3.17.2 bump ooxml-swift dep 0.21.2 → 0.21.6 (pure transitive bump, no MCP source changes; 4 ooxml-swift releases worth of hardening + new APIs land transparently). v0.21.3 (XML hardening, PsychQuant/ooxml-swift#7): DTD reject + 64KB attr-value cap + SAX-based root-element attribute parsing + name whitelist on emit. New XMLHardeningError throws on malicious .docx input. v0.21.4 (roundtrip loud-fail, PsychQuant/ooxml-swift#6): AlternateContent.fallbackRunsModified dirty flag throws RoundtripError.unserializedFallbackEdit on stale fallbackRuns mutation. Run.commentIds @available deprecated; migrate to commentRangeMarkers. v0.21.5 (insertEquation flexibility, PsychQuant/che-word-mcp#84 #85): InsertLocation overload for Document.insertEquation + flattenedDisplayText OMML coverage extends anchor lookup beyond plain runs. v0.21.6 (mutation surface, PsychQuant/ooxml-swift#5): Hyperlink.text setter @available deprecated (lossy); migrate to .runs property. Position field cascade Int = 0 → Int? = nil across 13 typed-child models. xml:space=preserve autosense in Run.toXMLThrowing emit. Plus 3 unreleased docs commits on ooxml-swift main (PsychQuant/ooxml-swift#14 #15 #17 + corrective cd841e7) covering needsPPr emit-gate ↔ Issue4 lock-in test bidirectional reference, parseRun vs parseParagraph walker pattern divergence rationale, foreign-namespace pPr asymmetry documentation. Tests: 236 passing (no regressions). Backward compatible: deprecation warnings only; no API removals. v3.17.1 bump ooxml-swift dep to v0.21.2 — pulls in pPr regression-guard + test infrastructure hardening from upstream (ooxml-swift#4 walker whitelist + #if DEBUG assert / ooxml-swift#13 empty self-closing test gap / ooxml-swift#16 countPPrOpenTags regex hardening excluding ). No public MCP tool change. v3.17.0 wrap_caption_seq MCP tool (Refs #62): Phase 2 of cross-repo work — exposes ooxml-swift v0.21.0 lib API as MCP tool. Bulk-wraps plain-text caption number portions in SEQ field runs across body paragraphs whose flattened text matches a regex (EXACTLY ONE numeric capture group). Captured digit becomes SEQ field cachedResult so Word's first-open render preserves user-typed numbering before F9. Rescues docs pasted from external sources (LaTeX-converted Word, Google Docs, Pandoc) so insert_table_of_figures / insert_table_of_tables produce populated TOFs. Idempotent: paragraphs already wrapping a SEQ field for sequence_name reported in skipped, never double-wrapped (detection covers both FieldSimple AND rawXML fldChar emissions). Phase 1 ships scope:body only (recurses into table cells + nestedTables + block-level SDT children); scope:all returns Error: scope_not_implemented for now (cross-container path lands in v3.17.x). Bookmark wrap opt-in (insert_bookmark + bookmark_template with literal ${number}) so default 23-caption rescue does NOT pollute list_bookmarks. Returns JSON: {matched_paragraphs, fields_inserted, paragraphs_modified:[idx,...], skipped:[{paragraph_index, reason},...]}. All preconditions checked BEFORE document mutation (regex compile + capture-group count + format/scope enums + bookmark_template invariant + doc_id opened). Tests: 5 new sub-tests in Issue62WrapCaptionSeqTests covering Scenarios 1-5. Suite 231 → 236 (+5, 0 fail / 9 skip). No ooxml-swift dep bump (still v0.21.0 from v3.16.2). v3.16.2 ooxml-swift dep bump 0.20.5 → 0.21.0 (Refs #62 #68): pure dep bump, no MCP source changes. Picks up two ooxml-swift fixes that surface transparently via existing tool dispatch. #68 (ooxml-swift v0.20.6): InsertLocation.findBodyChildContainingText now traverses .table (rows × cells × paragraphs + nestedTables) and .contentControl(_, children:) (recursive). MCP impact — insert_paragraph / insert_image_from_path / insert_equation / insert_caption calls using before_text / after_text now succeed when anchor text lives inside a table cell or block-level SDT (common in thesis docs with figure/table captions inside table cells). Returned position is top-level body.children index of the containing structure. Use into_table_cell for inside-cell inserts. Empty-needle guard: passing before_text:'' / after_text:'' now returns textNotFound instead of silently inserting at index 1. #62 (ooxml-swift v0.21.0): WordDocument.wrapCaptionSequenceFields(...) is now linked into the binary. Not yet exposed as an MCP tool — the wrap_caption_seq MCP wrapper ships in v3.17.0 (Phase 2 of the cross-repo work). Existing MCP tools unaffected. Suite: 231 → 231 (0 fail / 9 skip). v3.16.1 anchorPresence whitelist drift prevention (Refs #80): pure refactor, no runtime behavior change. New static toolAnchorWhitelists dict (single source of truth, keyed by MCP tool name → accepted anchor list) + new detectPresentAnchors(_:tool:) overload. 4 conflict-detection call sites switched from literal anchor arrays to (tool:) lookup. 4 new invariant/parity tests. Suite 227 → 231 (+4). Old (args, anchors:) overload preserved. Out-of-scope follow-ups: schema descriptions + dispatcher if-else chains still hardcode anchor names (pre-existing surfaces, not introduced by this PR). v3.16.0 Bundle B anchor DX consistency (Refs #70 #71 #72): BREAKING (input validation only) — three coordinated MCP-layer changes across the 4 #61-target tools. #71 (behavior) silent priority on conflicting anchors → structured error: insert_paragraph(after_text + index) was previously silent-priority; now returns 'Error: insert_paragraph: received conflicting anchors: after_text + index. Specify exactly one.' New static helper detectPresentAnchors with per-anchor type-aware predicates (null and wrong-type values do NOT count). #72 (validation) explicit text_instance ≤ 0 rejected — 'Error: : text_instance must be ≥ 1, got .' Omitted text_instance still defaults to 1. #70 (DX) all 32 'return Error:' lines in 4 #61-target tools rewritten as 'Error: : ' for AI-caller error attribution. throw WordError.* paths unchanged. Scope deliberately limited to 4 tools; remaining 41 return Error lines elsewhere deferred to error-prefix-sweep follow-up. SemVer rationale (minor not major): no schema break, no tool removal, restricting previously-undefined behavior. Tests: 201 → 227 (+26 sub-tests, 0 fail / 9 skip). No ooxml-swift dep bump (still v0.20.5). v3.15.3 Bundle A2 polish from v3.15.2 verify R3-R6 follow-ups (Refs #76 #77 #78 #79): #76 (docs) insert_caption description corrected from '三種 anchor' to enumerate all 5 (paragraph_index / after_image_id / after_table_index / after_text / before_text); insert_equation paragraph_index description clarified that the int is body.children-indexed (cross-references PsychQuant/ooxml-swift#10 for the lib-layer convention split). #77 (docs) insert_caption anchor set wording precision in CHANGELOG / manifest / marketplace.json / plugin.json — was 'its own anchor set including after_table_index' (implies disjoint), now 'shares after_image_id / after_text / before_text / paragraph_index, adds after_table_index + position, lacks into_table_cell' (explicit shared/adds/lacks). #78 (test) extends #69 append-index regression pin to bookmarkMarker / rawBlockElement / block-level contentControl body-children — the table case alone wouldn't catch a regression to getParagraphs().count - 1 that breaks for SDT / TOC bookmark / vendor extensions. #79 (test) adds round-trip depth: testInsertParagraphAppendIndexRoundTripsForInsertCalls demonstrates insert-family round-trip works (append + insert(N+1) + verify ordering); testInsertParagraphAppendIndexCannotRoundTripToUpdate pins the cross-family trade-off (update_paragraph(index=N) throws WordError.invalidIndex). Tests: 196 → 201 (+5 sub-tests, 0 fail / 9 skip). No production code change. No ooxml-swift dep bump (still v0.20.5). v3.15.2 closes Bundle A polish from #61 R2 verify (Refs #69 #73 #74 #75): #69 (bug) insert_paragraph append message reports body.children index instead of getParagraphs().count - 1 (mis-reported in docs with tables/SDTs by skipping table children); #74 (bug) insert_image_from_path debug log labels after_image_id correctly (was silently labeled 'index' since v3.15.1); #73 (test) regression pin for equation F5 partial-dict guard (existed since v3.15.1 but was untested); #75 (docs) clarifies '3 insert tools' wording — scope is the 3 #61-target tools (insert_paragraph / insert_equation / insert_image_from_path); insert_caption is a 4th insert tool with a partially-overlapping anchor set (shares after_image_id / after_text / before_text / paragraph_index, adds after_table_index + position, lacks into_table_cell), intentionally outside this unification scope. Tests: 194 → 196 (0 fail / 9 skip). No behavior change in normal call paths. No ooxml-swift dep bump (still v0.20.5). Word MCP Server - Swift 原生 OOXML 操作,233 個工具。v3.15.1 closes verify findings F1+F2+F3+F5 from v3.15.0 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh):F1 (P1) `after_image_id` anchor 加到 insert_paragraph + insert_equation (display only) + insert_image_from_path — lib InsertLocation.afterImageId 從 #44 起就 ready 但只有 insert_caption 暴露 MCP-layer;v3.15.0 inherited 這個 gap,本 release 補齊。F2 (P1) `into_table_cell` 加到 insert_equation (display only) — display equation 是新建 paragraph,cell 放置 well-defined;inline mode 拒絕。F3 (P2) equation 成功訊息加 anchor info('Inserted equation (display mode: true, after text X (instance N))' 等)— 關閉同 v3.14.4 LOOKUP 的 over-claim 模式(caller 之前無法區分 anchor 命中 vs append fallthrough)。F5 (P2) malformed `into_table_cell` partial dict(傳 `{table_index: 0}` 缺 row + col)silent fallthrough → 走 next anchor / append → 結果在錯位置且 caller 不知。改回 structured 'Error: into_table_cell requires all three fields',3 #61-target tools 同步修(cross-cutting consistency)。Anchor priority unified across all 3 #61-target insert tools (`insert_paragraph` / `insert_equation` / `insert_image_from_path`; `insert_caption` has its own anchor set):into_table_cell > after_image_id > after_text > before_text > index > append。Inline equation 拒絕擴大 — 現在拒絕所有 4 個 anchor params(before/after_text + after_image_id + into_table_cell),不只 v3.15.0 的 2 個。Tests: Issue61V315PointReleaseTests (9 sub-tests cross 3 tools)。Suite 185 → 194 (0 fail / 9 pre-existing skips)。**No ooxml-swift dep bump** — 仍 v0.20.5(lib 從 #44 起就 ready)。Follow-up issues 另開:F4 inline equation 更通用設計 (e.g. into_paragraph_with_text) / F6 text anchor 擴及 table-cell paragraphs 與 block-level SDT / F7 getParagraphs().count - 1 message 在 doc 含 tables/SDTs 時 mis-report (pre-existing) / F8 error message 加 tool-prefix / F9 multiple anchor params 同時傳入 silent priority winner / F10 text_instance≤0 normalize。Backward compatible — schema additions optional,既有 v3.15.0 callers 不變;只有 malformed into_table_cell 從 silent fallthrough 改成 structured error(會被 buggy caller 注意到)+ equation message 加 suffix(substring 'Inserted equation' 仍存在)。v3.15.0 closes #61 — insert_paragraph 與 insert_equation 現在接受跟 insert_image_from_path 一致的 anchor 參數(after_text / before_text / text_instance / into_table_cell — into_table_cell 僅 insert_paragraph)。Pre-fix MCP 層 silently drop 這些參數 — JSON schema 接受但 handler dispatch 忽略,呼叫 fall through 到 legacy paragraph_index path 或 append at end。Lib API Document.insertParagraph(_: at: InsertLocation) 從 #44 起就支援所有六種 anchor cases(paragraphIndex / afterImageId / afterTableIndex / intoTableCell / afterText / beforeText),本 release 補齊 MCP 側 wire-up gap,無需 ooxml-swift dep bump(v0.20.5 已足夠)。Anchor priority mirror insert_image_from_path:into_table_cell > after_text > before_text > index > append。Errors(textNotFound / tableIndexOutOfRange / tableCellOutOfRange)回 structured 訊息而非 silent fallthrough — AI caller 能 surface failure 而非拿到位置錯誤的 misleading 'success'。**Inline equation explicit rejection**:insert_equation 在 display_mode=false(inline)時 explicitly 拒絕 after_text / before_text,回 structured error — 語意模糊('append OMML run into existing para containing this text' vs 'insert new para before/after target para'),inline placement 仍用 paragraph_index。Display-mode equation 建新 paragraph,anchor 語意明確。Tests: Issue61InsertParagraphAnchorsSmokeTests(5 sub-tests:after_text resolution / before_text resolution / text_instance disambiguation / into_table_cell append / textNotFound error)+ Issue61InsertEquationAnchorsSmokeTests(4 sub-tests:after_text + before_text in display mode / inline mode rejection / textNotFound error)。Suite 176 → 185 (0 fail / 9 pre-existing skips)。**No ooxml-swift dep bump** — v0.20.5 已有所有需要的 lib API。Backward compatible — anchor params 全 optional;既有 index / paragraph_index callers 不變;無 schema removal、無既有行為改動。**Real-world impact**:thesis-rescue / template-population workflow 不再需要 fall back 到「append at end + 手動 cut/paste in Word UI」或 binary-search 猜 paragraph_index,AI caller 對 3 #61-target insert tools(insert_image_from_path / insert_paragraph / insert_equation)對稱地用 surrounding context 定位 anchor。v3.14.5 closes Refs #63 verify F1 P1:擴充 findBodyChildContainingText 涵蓋所有 editable surfaces,補上 v3.14.4 CHANGELOG over-claim 的 insert anchor lookup gap。Pre-fix v3.14.4 只修了 REPLACE path(replace_text → Document.replaceInParagraphSurfaces 走 contentControls / hyperlinks / fieldSimples / alternateContents)但 LOOKUP path(findBodyChildContainingText 用於 InsertLocation.afterText / .beforeText 解析)只看 para.runs,所以 insert_image_from_path / insert_paragraph / insert_caption before_text/after_text 對 SDT-wrapped anchor 仍丟 textNotFound。Verify ensemble(5 Claude reviewers + Codex)的 requirements F1 P1 finding 抓到 CHANGELOG over-claim — 用戶選擇 Option B 擴充修而非縮 scope。ooxml-swift v0.20.5 新增 TextReplacementEngine.flatTextOfContentXML(read-only XML walker mirror replaceInContentXML flattening rules,跳過 / / nested subtrees)+ Paragraph.flattenedDisplayText 擴充 method 涵蓋 runs + hyperlinks + fieldSimples + alternateContents + contentControls(recursive into nested SDT children)。findBodyChildContainingText 改用 flattenedDisplayText 取代原本的 para.runs.map { $0.text }.joined()。新增 Issue63InsertAnchorInlineSDTTests(lib,3 wrappers × afterText/beforeText/insertImage = 3 sub-tests / 5 assertions)+ Issue63InsertAnchorInlineSDTSmokeTests(MCP,2 sub-tests pin lib-layer fix)。Suite 693 → 696 ooxml-swift / 174 → 176 che-word-mcp(0 fail)。基於 ooxml-swift v0.20.5。Backward compatible — strict superset of pre-fix lookup behavior(找到更多 anchors,既有 plain runs anchor 仍照常運作)。**Insert anchor lookup gap 此 release 完整補齊**,所有 inline wrappers 在 REPLACE + LOOKUP 兩個 path 都對稱覆蓋。v3.14.4 修 replace_text 對 inline `` content control 的 wrapper coverage gap(Refs #63):Document.replaceInParagraphSurfaces 之前覆蓋 paragraph.runs / hyperlinks / fieldSimples / alternateContents 但 **沒有** paragraph.contentControls — 包在 inline `` 裡的文字 silently 0-match。外部 converter(pandoc / Quarto / LaTeX→docx)習慣把 cross-ref placeholder([tab:foo] / [fig:bar] / [Smith 2020])包成 inline SDT,所以症狀跟 bracketed text 高度相關,但其實 **brackets 是 coincidence** — bracket-free needle 在 inline SDT 裡也 fail。Issue title「literal `[ ]` brackets」是誤導,差別測試(fldChar / fldSimple / hyperlink / inlineSDT 四個 inline wrapper × 四種 needle)證實只有 inline SDT case 失敗,其他三個 wrapper 從 v0.19.0+ #56 Phase 5 起就 typed-runs 覆蓋好了。Surgical fix architecture:ooxml-swift v0.20.4 新增 TextReplacementEngine.replaceInContentXML(XML DOM walker,wrap ContentControl.content 在 synthetic root xmlns:w,遍歷所有 `` descendants 在 document order,build flat string + offset map mirror flattenRuns invariant,run same literal/regex find logic,splice replacements 回 `` element string content;re-serialize wrapper children 去掉 wrapper tag)+ Document.replaceInContentControl(recursive helper 涵蓋 cc.content + cc.children 處理 nested SDT)。Wired 進 Document.replaceInParagraphSurfaces 接在 alternateContents loop 之後。設計上跳過:``(TC deletion text,不顯示)、``(field instruction code,不顯示)、nested `` subtrees(typed cc.children 由外層 recursion 處理避免 double-replacement)。Round-trip discipline:只 mutate `` element 的 string content;xml:space=\"preserve\" 與其他 attribute 完整保留(attribute set 從不被 touch)。新增 Issue63InlineSDTReplaceTests(4 個 wrapper × 4 個 needle 的 differential test + nested SDT recursion + round-trip wrapper preservation = 3 sub-tests / 18 assertions)+ MCP-layer Issue63ReplaceTextInlineSDTSmokeTests(2 sub-tests pin lib-layer fix)。Suite 690 → 693 ooxml-swift / 172 → 174 che-word-mcp(0 fail)。基於 ooxml-swift v0.20.4。Backward compatible — surgical fix 只新增 code path,沒改任何既有行為(runs/hyperlinks/fieldSimples/alternateContents replacement path 不動,ContentControl model 維持 raw XML storage 不重構)。Out-of-scope(separate follow-up):ContentControl 從 content:String 升級為 typed Run 列表(SDD-warranted refactor);smartTags / bidiOverrides / customXmlBlocks / unrecognizedChildren 維持 raw-carrier passthrough。v3.14.3 sub-stack E of paragraph-level content-equality (closes #66):Paragraph 新增 w14ParaId / w14TextId 欄位,提取並 round-trip opening tag 上的 w14:* 屬性(Word 用於 collaborative editing 和 comment threading 的 revision-tracking GUIDs)。Plain attribute passthrough,String? typing — Word 的 GUIDs 是 8-char hex tokens(NOT RFC 4122 UUIDs),所以 opaque-string round-trip 是正確選擇。Pre-fix v3.14.2 silently dropped 兩個 attributes — 佔了 NTPU 論文 fixture w14:* token loss 的 ~95%(2214 / 2359 lost tokens 是這兩個 attrs)。Post-E 量測:w14: 保留率 10.55% → 93.98%;document.xml 流失 10.95% → 8.02%。Combined with sub-stack D (#65), total impact since v3.14.1: 50% → 98.89% (D)、w14:* 5% → 93.98% (E)、document.xml 流失 16.66% → 8.02% (D+E, -8.64 pp)。Matrix-pin testDocumentContentEqualityInvariant 同步抬升 floor(w14: 0.04→0.90、sizeLossRatio 上限 0.12→0.10)— matrix-pin 現在 LOAD-BEARING across **5 preservation classes**(rFonts/noProof/lang/kern/w14:)spanning run-level + paragraph-level + paragraph-mark scope。Defensive design (R2 review fixes):openingPTag() routes attributes through escapeXMLAttribute;parseParagraph rejects schema-invalid empty-string GUIDs。基於 ooxml-swift v0.20.3。Backward compatible(兩個 fields 都 optional、default nil;openingPTag empty-attrs gate 防止 synthetic emit)。剩餘 8% 流失主要是其他 w14:* attribute classes(如 w14:* on )— tracked as separate follow-up SDD。v3.14.2 sub-stack D of paragraph-level content-equality (closes #65):ParagraphProperties 新增 markRunProperties 欄位,提取並 round-trip direct child of — paragraph-mark formatting per ECMA-376 §17.3.1.27 CT_PPrBase(控制 pilcrow ¶ 字符外觀的字型/顏色/語言/字距)。Reuses parseRunProperties verbatim — schema 跟 run-level CT_RPr 一致,所以 sub-stack C 的 typed extraction(rFonts 4-axis / noProof / kern / lang 3-axis)和 rawChildren passthrough(w14:* 效果)全部免費繼承。NTPU 論文 fixture 量測影響: 保留率 50% → 98.89%; 88% → 98.77%; 92% → 100%; 84% → 99.93%;document.xml 大小流失 16.66% → 10.95%。Matrix-pin testDocumentContentEqualityInvariant 同步抬升 floor(lang 0.45→0.95、rFonts/noProof/kern 0.95、sizeLossRatio 上限 0.175→0.12)。Sub-stack E (#66 w14:paraId/textId) 接著 ship 到 v3.14.3,把流失壓到 < 5%,達成「edit 一個字 → document.xml shrinks <1%」strong demo。基於 ooxml-swift v0.20.2。Backward compatible(markRunProperties optional、default nil、writer empty-gate 防止 synthetic empty )。v3.14.1 sub-stack C-CONT closes triple-confirmed P0 (R2 + R5 + Codex 6-AI verify):recognizedRprChildren Set 列了 ~16+ rPr child kinds 為 'recognized' 但 parseRunProperties 沒有 typed extraction → silent drop。受影響的常見元素:(character spacing)、/(run shading)、(CJK emphasis marks)、///////。Fix:trim Set 到 ONLY actually-typed-extracted-or-emitted kinds。Round-trip size loss: pre-fix v3.13.x 32% → v3.14.0 17.75% → v3.14.1 16.66%。Methodology lesson (6th):P2 from one reviewer can become P0 when another applies real-world impact lens. v3.14.0 closes #60(sub-stack C of #58/#59/#60)— RunProperties field-loss audit。Bump ooxml-swift v0.19.13→v0.20.0。新增 typed fields:4-axis rFonts (ascii/hAnsi/eastAsia/cs/hint — 之前被收斂成單一值)、noProof、kern、3-axis lang (val/eastAsia/bidi),加上 rawChildren passthrough 處理 unrecognized rPr children(如 w14:textOutline / w14:textFill / w14:glow)。**Pre-fix MCP 用戶看到 eastAsia/cs 字型(如 DFKai-SB 用於繁體中文)在 round-trip 時 silently 被替換成 ascii 值;v3.14.0 完整保留 4 個 axis**。Matrix-pin testDocumentContentEqualityInvariant 加上 preservation-class-3 ratio-floor assertions,現在 LOAD-BEARING — 任何未來 RunProperties regression 都會被 matrix-pin 抓到。Thesis fixture document.xml round-trip 大小:pre-fix 32% 損失 → post-sub-stack-C 17.75% 損失(改善 14.25 percentage points)。剩餘 17.75% 是 paragraph-mark rPr + w14:paraId/textId drops(separate out-of-scope follow-up SDD)。**'if not typed, preserve as raw' 原則架構性完成** — 從 sub-stack A (#58 BodyChild)、B (#59 WhitespaceOverlay) 一路發展到 C (#60 RunProperties)。Backward compatible — 保留 fontName field,mirror rFonts.ascii。v3.13.13 CRITICAL HOTFIX (sub-stack B-CONT-2-CONT) reverted v3.13.12 的 TIER-0 over-fix。v3.13.12 (DO NOT USE — 刪除 內容)。v3.13.11 sub-stack B-CONT。基於 ooxml-swift v0.20.0。", + "author": { + "name": "Che Cheng" + }, + "source": "./plugins/che-word-mcp", + "category": "productivity" + }, + { + "name": "macdoc", + "version": "1.1.0", + "description": "macOS 原生文件處理 CLI — 格式轉換、VLM OCR(含 host profile 設定)、SRT 處理", + "author": { + "name": "Che Cheng" + }, + "source": "./plugins/macdoc", + "category": "productivity" + }, + { + "name": "che-pdf-mcp", + "version": "0.1.0", + "description": "PDF 文件處理 MCP server — PDFKit 解析與文字提取、Vision OCR(原生 macOS)、圖片/區域擷取、亂碼區域偵測。v0.1.0: 首次 marketplace 發布(signed + notarized universal binary)。", + "author": { + "name": "Che Cheng" + }, + "source": "./plugins/che-pdf-mcp", + "category": "productivity" + }, + { + "name": "che-pptx-mcp", + "version": "0.1.0", + "description": "PowerPoint (.pptx) MCP server — PresentationML 解析與生成:slides、shapes、tables、notes、theme、markdown 匯出。v0.1.0: 首次 marketplace 發布(signed + notarized universal binary)。", + "author": { + "name": "Che Cheng" + }, + "source": "./plugins/che-pptx-mcp", + "category": "productivity" + } + ] +} diff --git a/CLAUDE.md b/CLAUDE.md index 5203956..85af657 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,6 +35,8 @@ This file provides guidance to Claude Code when working with code in this reposi **macdoc** 是一個原生 macOS 文件處理工具集,專注於文件格式解析、轉換和 OCR 功能。整個專案使用 Swift 開發,充分利用 Apple 平台的原生能力。 +本 repo 同時是 **Claude Code plugin marketplace**(`.claude-plugin/marketplace.json` + `plugins/`,2026-07 起,#112):發布 `che-word-mcp`、`che-pdf-mcp`、`che-pptx-mcp`、`macdoc` 四個 plugins,使用者以 `claude plugin marketplace add PsychQuant/macdoc` 安裝。注意 `plugins/`(plugin shells,正常入版控)與 `packages/`(gitignored 本地套件)的差異;MCP shells 的 wrapper 從各 binary repo 的 GitHub Releases 自動下載 signed binary。發布新版時同步 bump `plugins//.claude-plugin/plugin.json` 與 `.claude-plugin/marketplace.json` 兩處版本。 + ## Project Structure ``` diff --git a/README.md b/README.md index 111eec0..e4d8aae 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,26 @@ 原生 macOS 文件處理工具集,專注於文件格式轉換和 OCR。使用 Swift 開發,充分利用 Apple 平台原生能力(PDFKit、Vision.framework)。 +## Claude Code Plugin Marketplace + +本 repo 同時是 Claude Code plugin marketplace,提供 macdoc 生態系的 4 個 plugins: + +| Plugin | 內容 | +|--------|------| +| `che-word-mcp` | Word (.docx) MCP server(OOXML 讀寫,218+ 工具) | +| `che-pdf-mcp` | PDF MCP server(PDFKit 解析、Vision OCR) | +| `che-pptx-mcp` | PowerPoint (.pptx) MCP server(PresentationML 解析與生成) | +| `macdoc` | macdoc CLI 使用指南 skill | + +```bash +claude plugin marketplace add PsychQuant/macdoc +claude plugin install che-word-mcp@macdoc # 或 che-pdf-mcp / che-pptx-mcp / macdoc +``` + +MCP plugins 的 wrapper 會自動從各 repo 的 GitHub Releases 下載 signed + notarized universal binary。 + +> 遷移註記:`che-word-mcp` 與 `macdoc` 兩個 plugins 原先發布於 `psychquant-claude-plugins` marketplace,自 2026-07 起以本 marketplace 為準。 + ## Prerequisites - **macOS 14+**(Sonoma 或更新) diff --git a/plugins/che-pdf-mcp/.claude-plugin/plugin.json b/plugins/che-pdf-mcp/.claude-plugin/plugin.json new file mode 100644 index 0000000..a1c94f0 --- /dev/null +++ b/plugins/che-pdf-mcp/.claude-plugin/plugin.json @@ -0,0 +1,8 @@ +{ + "name": "che-pdf-mcp", + "description": "PDF 文件處理 MCP server — PDFKit 解析與文字提取、Vision OCR(原生 macOS)、圖片/區域擷取、亂碼區域偵測。 v0.1.0: 首次 marketplace 發布。", + "version": "0.1.0", + "author": { + "name": "Che Cheng" + } +} diff --git a/plugins/che-pdf-mcp/.mcp.json b/plugins/che-pdf-mcp/.mcp.json new file mode 100644 index 0000000..3e4504a --- /dev/null +++ b/plugins/che-pdf-mcp/.mcp.json @@ -0,0 +1,7 @@ +{ + "pdf": { + "type": "stdio", + "command": "${CLAUDE_PLUGIN_ROOT}/bin/che-pdf-mcp-wrapper.sh", + "description": "PDF 文件處理 MCP server — PDFKit 解析與文字提取、Vision OCR(原生 macOS)、圖片/區域擷取、亂碼區域偵測。" + } +} diff --git a/plugins/che-pdf-mcp/CHANGELOG.md b/plugins/che-pdf-mcp/CHANGELOG.md new file mode 100644 index 0000000..18a6939 --- /dev/null +++ b/plugins/che-pdf-mcp/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to the che-pdf-mcp plugin shell will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## [0.1.0] - 2026-07-02 + +### Added + +- 首次 marketplace 發布(PsychQuant/macdoc marketplace,Refs PsychQuant/macdoc#112)。 +- `.mcp.json` + version-aware auto-download wrapper(自 `PsychQuant/che-pdf-mcp` GitHub Releases 下載 signed + notarized universal binary)。 diff --git a/plugins/che-pdf-mcp/README.md b/plugins/che-pdf-mcp/README.md new file mode 100644 index 0000000..98fe6ba --- /dev/null +++ b/plugins/che-pdf-mcp/README.md @@ -0,0 +1,16 @@ +# che-pdf-mcp + +PDF 文件處理 MCP server — PDFKit 解析與文字提取、Vision OCR(原生 macOS)、圖片/區域擷取、亂碼區域偵測。 + +## 安裝 + +```bash +claude plugin marketplace add PsychQuant/macdoc +claude plugin install che-pdf-mcp@macdoc +``` + +Wrapper 會自動從 [GitHub Releases](https://github.com/PsychQuant/che-pdf-mcp/releases) 下載 signed + notarized 的 `ChePDFMCP` universal binary 到 `~/bin/`。 + +## 原始碼 + +https://github.com/PsychQuant/che-pdf-mcp diff --git a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh new file mode 100755 index 0000000..32d23bd --- /dev/null +++ b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Version-aware auto-download wrapper for ChePDFMCP. +# +# Design: +# - Reads desired version from plugin.json (plugin's intended binary version) +# - Compares against ~/bin/.ChePDFMCP.version sidecar +# - Re-downloads when plugin has been updated but binary is stale +# - Atomic file swap (.tmp + mv) so partial downloads never break things +# - Falls back to releases/latest if plugin.json unreadable or pinned tag missing +# + +set -u + +REPO="PsychQuant/che-pdf-mcp" +BINARY_NAME="ChePDFMCP" +INSTALL_DIR="$HOME/bin" +BINARY="$INSTALL_DIR/$BINARY_NAME" +VERSION_FILE="$INSTALL_DIR/.${BINARY_NAME}.version" + +# Locate plugin root via wrapper's own path (more reliable than $CLAUDE_PLUGIN_ROOT +# which isn't guaranteed in MCP spawn env). Wrapper lives at PLUGIN_ROOT/bin/*.sh. +PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" + +# Read desired version from plugin.json (empty string on any failure → fallback to "latest"). +DESIRED_VERSION="" +if [[ -f "$PLUGIN_JSON" ]]; then + DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ + | head -1 | cut -d'"' -f4 || true) +fi + +# Read currently installed version from sidecar (empty string if file missing/unreadable). +INSTALLED_VERSION="" +[[ -f "$VERSION_FILE" ]] && INSTALLED_VERSION=$(tr -d '[:space:]' < "$VERSION_FILE" 2>/dev/null || true) + +# Decide whether to download. +NEED_DOWNLOAD=false +REASON="" +if [[ ! -x "$BINARY" ]]; then + NEED_DOWNLOAD=true + REASON="binary not installed" +elif [[ -n "$DESIRED_VERSION" ]] && [[ "$INSTALLED_VERSION" != "$DESIRED_VERSION" ]]; then + NEED_DOWNLOAD=true + REASON="plugin wants v${DESIRED_VERSION}, installed is v${INSTALLED_VERSION:-unknown}" +fi + +if $NEED_DOWNLOAD; then + echo "$BINARY_NAME: $REASON — downloading from $REPO..." >&2 + mkdir -p "$INSTALL_DIR" + + # Try pinned tag first, then fall back to latest release. + URL="" + for API_URL in \ + "${DESIRED_VERSION:+https://api.github.com/repos/$REPO/releases/tags/v$DESIRED_VERSION}" \ + "https://api.github.com/repos/$REPO/releases/latest" + do + [[ -z "$API_URL" ]] && continue + URL=$(curl -sL --max-time 30 "$API_URL" 2>/dev/null \ + | grep '"browser_download_url"' | grep "/$BINARY_NAME\"" | head -1 \ + | sed 's/.*"\(https[^"]*\)".*/\1/') + [[ -n "$URL" ]] && break + done + + if [[ -z "$URL" ]]; then + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: WARNING — no download URL found, keeping existing binary" >&2 + else + echo "$BINARY_NAME: ERROR — no download URL found at $REPO. Install manually: https://github.com/$REPO/releases" >&2 + exit 1 + fi + else + if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then + chmod +x "${BINARY}.tmp" + mv "${BINARY}.tmp" "$BINARY" + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${DESIRED_VERSION:-latest}" >&2 + else + rm -f "${BINARY}.tmp" 2>/dev/null + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: WARNING — download failed, keeping existing binary" >&2 + else + echo "$BINARY_NAME: ERROR — download failed" >&2 + exit 1 + fi + fi + fi +fi + +exec "$BINARY" "$@" diff --git a/plugins/che-pdf-mcp/skills/che-pdf-mcp/SKILL.md b/plugins/che-pdf-mcp/skills/che-pdf-mcp/SKILL.md new file mode 100644 index 0000000..6fc03d9 --- /dev/null +++ b/plugins/che-pdf-mcp/skills/che-pdf-mcp/SKILL.md @@ -0,0 +1,296 @@ +# che-pdf-mcp + +A Swift-native MCP server for PDF document manipulation using macOS native frameworks. Provides 25 tools for reading, extracting, searching, merging, OCR, and manipulating PDF files. + +## When to Use + +Use `che-pdf-mcp` when you need to: + +- Get information about PDF files (page count, metadata, version) +- Extract text content from PDF documents +- Search for specific text within PDFs +- Merge multiple PDF files into one +- Extract specific pages from a PDF +- OCR scanned documents using Vision framework +- Convert PDF to Markdown format +- Extract or render images from PDFs +- Detect PDF type (text-based, scanned, mixed) +- Rotate, split, watermark, or encrypt PDFs +- Fetch PDFs from URLs + +## Core Workflows + +### Get PDF Information + +```text +1. pdf_info(path: "/path/to/document.pdf") + → Returns page count, version, metadata, encryption status +``` + +### Extract Text + +```text +1. pdf_extract_text(path: "/path/to/document.pdf") + → Returns all text content + + OR with page range: + + pdf_extract_text(path: "...", start_page: 1, end_page: 5) + → Returns text from pages 1-5 +``` + +### OCR Scanned Documents + +```text +1. pdf_ocr_text(path: "/path/to/scanned.pdf", languages: ["en-US", "zh-Hant"]) + → Returns OCR-extracted text + + OR for detailed layout: + + pdf_ocr_page(path: "...", page: 1, languages: ["en-US"]) + → Returns text blocks with position and confidence +``` + +### Search Text + +```text +1. pdf_search_text(path: "/path/to/document.pdf", query: "keyword") + → Returns matches with page numbers and context +``` + +### Merge PDFs + +```text +1. pdf_merge( + paths: ["/path/to/file1.pdf", "/path/to/file2.pdf"], + output_path: "/path/to/merged.pdf" + ) + → Creates merged PDF +``` + +### Extract Pages + +```text +1. pdf_extract_pages( + path: "/path/to/source.pdf", + pages: "1,3,5-10", + output_path: "/path/to/extracted.pdf" + ) + → Creates PDF with specified pages +``` + +### Convert to Markdown + +```text +1. pdf_to_markdown(path: "/path/to/document.pdf") + → Returns Markdown with YAML frontmatter + + OR save to file: + + pdf_to_markdown(path: "...", output_path: "/path/to/output.md") +``` + +### Render Page to Image + +```text +1. pdf_render_page( + path: "/path/to/document.pdf", + page: 1, + output_path: "/path/to/page1.png", + dpi: 300 + ) + → Renders page as PNG image +``` + +### Detect PDF Type + +```text +1. pdf_detect_type(path: "/path/to/document.pdf") + → Returns type analysis (text-based, scanned, mixed) + → Recommends appropriate extraction method +``` + +### Rotate Pages + +```text +1. pdf_rotate_pages( + path: "/path/to/source.pdf", + pages: "1-3", + angle: 90, + output_path: "/path/to/rotated.pdf" + ) + → Rotates specified pages +``` + +### Split PDF + +```text +1. pdf_split( + path: "/path/to/source.pdf", + split_method: "each", # or "count:5" or "ranges:1-3,4-6" + output_directory: "/path/to/output" + ) + → Creates multiple PDF files +``` + +### Add Watermark + +```text +1. pdf_add_watermark( + path: "/path/to/source.pdf", + text: "CONFIDENTIAL", + output_path: "/path/to/watermarked.pdf" + ) + → Adds text watermark to all pages +``` + +### Encrypt PDF + +```text +1. pdf_encrypt( + path: "/path/to/source.pdf", + user_password: "secret123", + output_path: "/path/to/encrypted.pdf" + ) + → Creates password-protected PDF +``` + +### Fetch from URL + +```text +1. pdf_url_fetch( + url: "https://example.com/document.pdf", + save_path: "/path/to/local.pdf" + ) + → Downloads and opens PDF from URL +``` + +### Session-based Operations + +```text +1. pdf_open(path: "/path/to/document.pdf") + → Returns document_id + +2. pdf_extract_text(doc_id: "...") + pdf_search_text(doc_id: "...", query: "...") + pdf_ocr_text(doc_id: "...") + +3. pdf_close(doc_id: "...") + → Clean up when done +``` + +## Tool Reference + +### Document Access (6 tools) + +- `pdf_info` - Get PDF metadata (pages, version, author, title, etc.) +- `pdf_list` - List PDF files in a directory +- `pdf_open` - Open PDF and get document ID for subsequent operations +- `pdf_close` - Close an open document +- `pdf_list_open` - List all currently open documents +- `pdf_page_count` - Get number of pages + +### Text Operations (3 tools) + +- `pdf_extract_text` - Extract plain text (with optional page range) +- `pdf_search_text` - Search for text with context +- `pdf_extract_text_with_layout` - Get text with position information + +### Document Operations (3 tools) + +- `pdf_merge` - Combine multiple PDFs into one +- `pdf_extract_pages` - Extract specific pages (supports "1,3,5-10" format) +- `pdf_save` - Save changes to an open document + +### OCR (2 tools) + +- `pdf_ocr_text` - Extract text from scanned PDFs using Vision OCR +- `pdf_ocr_page` - OCR single page with position and confidence info + +### Structured Output (2 tools) + +- `pdf_to_markdown` - Convert PDF to Markdown format +- `pdf_get_outline` - Get PDF outline/table of contents + +### Image Processing (2 tools) + +- `pdf_extract_images` - Extract embedded images from PDF +- `pdf_render_page` - Render PDF page to image file (PNG/JPG) + +### Detection (2 tools) + +- `pdf_detect_type` - Detect PDF type (text/scanned/mixed) +- `pdf_check_accessibility` - Check accessibility features + +### Advanced Operations (5 tools) + +- `pdf_rotate_pages` - Rotate pages (90, 180, 270 degrees) +- `pdf_split` - Split PDF into multiple files +- `pdf_add_watermark` - Add text watermark +- `pdf_encrypt` - Password-protect PDF +- `pdf_url_fetch` - Fetch PDF from URL + +## Tips + +1. **Direct path vs session**: For single operations, use `path` parameter directly. For multiple operations on the same file, use `pdf_open` first to get a `doc_id`. + +2. **Page numbers are 1-indexed**: First page is page 1, not page 0. + +3. **Page specification format**: Use comma-separated values and ranges like "1,3,5-10,15". + +4. **Search is case-insensitive by default**: Use `case_sensitive: true` if needed. + +5. **Merge order matters**: Files are merged in the order provided in the `paths` array. + +6. **OCR languages**: Use Vision-supported language codes like "en-US", "zh-Hant", "ja", "ko". + +7. **PDF type detection**: Use `pdf_detect_type` first to determine if OCR is needed. + +8. **DPI for rendering**: Default is 150 DPI; use 300 for print quality. + +## Examples + +### Analyze a Research Paper + +```text +Get info about ~/Documents/paper.pdf +Detect if it's text-based or scanned +Extract text (or use OCR if scanned) +Search for "methodology" to find relevant sections +Convert to Markdown for easier reading +``` + +### Combine Reports + +```text +Merge quarterly reports: +- ~/Reports/Q1.pdf +- ~/Reports/Q2.pdf +- ~/Reports/Q3.pdf +- ~/Reports/Q4.pdf +into ~/Reports/annual-report.pdf +``` + +### Process Scanned Document + +```text +Detect type of ~/Documents/old_scan.pdf +If scanned, use pdf_ocr_text with appropriate languages +Convert to Markdown for text processing +``` + +### Secure a Document + +```text +Add "CONFIDENTIAL" watermark to ~/Documents/sensitive.pdf +Encrypt with password protection +Save to ~/Documents/secured.pdf +``` + +### Extract Chapter + +```text +Get outline of ~/Books/textbook.pdf +Extract pages 50-80 (Chapter 3) +Save as ~/Excerpts/chapter3.pdf +``` diff --git a/plugins/che-pptx-mcp/.claude-plugin/plugin.json b/plugins/che-pptx-mcp/.claude-plugin/plugin.json new file mode 100644 index 0000000..96eb3d1 --- /dev/null +++ b/plugins/che-pptx-mcp/.claude-plugin/plugin.json @@ -0,0 +1,8 @@ +{ + "name": "che-pptx-mcp", + "description": "PowerPoint (.pptx) MCP server — PresentationML 解析與生成:slides、shapes、tables、notes、theme、markdown 匯出。 v0.1.0: 首次 marketplace 發布。", + "version": "0.1.0", + "author": { + "name": "Che Cheng" + } +} diff --git a/plugins/che-pptx-mcp/.mcp.json b/plugins/che-pptx-mcp/.mcp.json new file mode 100644 index 0000000..3d8779f --- /dev/null +++ b/plugins/che-pptx-mcp/.mcp.json @@ -0,0 +1,7 @@ +{ + "pptx": { + "type": "stdio", + "command": "${CLAUDE_PLUGIN_ROOT}/bin/che-pptx-mcp-wrapper.sh", + "description": "PowerPoint (.pptx) MCP server — PresentationML 解析與生成:slides、shapes、tables、notes、theme、markdown 匯出。" + } +} diff --git a/plugins/che-pptx-mcp/CHANGELOG.md b/plugins/che-pptx-mcp/CHANGELOG.md new file mode 100644 index 0000000..37a118a --- /dev/null +++ b/plugins/che-pptx-mcp/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog + +All notable changes to the che-pptx-mcp plugin shell will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). + +## [0.1.0] - 2026-07-02 + +### Added + +- 首次 marketplace 發布(PsychQuant/macdoc marketplace,Refs PsychQuant/macdoc#112)。 +- `.mcp.json` + version-aware auto-download wrapper(自 `PsychQuant/che-pptx-mcp` GitHub Releases 下載 signed + notarized universal binary)。 diff --git a/plugins/che-pptx-mcp/README.md b/plugins/che-pptx-mcp/README.md new file mode 100644 index 0000000..5f7dedd --- /dev/null +++ b/plugins/che-pptx-mcp/README.md @@ -0,0 +1,16 @@ +# che-pptx-mcp + +PowerPoint (.pptx) MCP server — PresentationML 解析與生成:slides、shapes、tables、notes、theme、markdown 匯出。 + +## 安裝 + +```bash +claude plugin marketplace add PsychQuant/macdoc +claude plugin install che-pptx-mcp@macdoc +``` + +Wrapper 會自動從 [GitHub Releases](https://github.com/PsychQuant/che-pptx-mcp/releases) 下載 signed + notarized 的 `ChePPTXMCP` universal binary 到 `~/bin/`。 + +## 原始碼 + +https://github.com/PsychQuant/che-pptx-mcp diff --git a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh new file mode 100755 index 0000000..8373117 --- /dev/null +++ b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Version-aware auto-download wrapper for ChePPTXMCP. +# +# Design: +# - Reads desired version from plugin.json (plugin's intended binary version) +# - Compares against ~/bin/.ChePPTXMCP.version sidecar +# - Re-downloads when plugin has been updated but binary is stale +# - Atomic file swap (.tmp + mv) so partial downloads never break things +# - Falls back to releases/latest if plugin.json unreadable or pinned tag missing +# + +set -u + +REPO="PsychQuant/che-pptx-mcp" +BINARY_NAME="ChePPTXMCP" +INSTALL_DIR="$HOME/bin" +BINARY="$INSTALL_DIR/$BINARY_NAME" +VERSION_FILE="$INSTALL_DIR/.${BINARY_NAME}.version" + +# Locate plugin root via wrapper's own path (more reliable than $CLAUDE_PLUGIN_ROOT +# which isn't guaranteed in MCP spawn env). Wrapper lives at PLUGIN_ROOT/bin/*.sh. +PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" + +# Read desired version from plugin.json (empty string on any failure → fallback to "latest"). +DESIRED_VERSION="" +if [[ -f "$PLUGIN_JSON" ]]; then + DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ + | head -1 | cut -d'"' -f4 || true) +fi + +# Read currently installed version from sidecar (empty string if file missing/unreadable). +INSTALLED_VERSION="" +[[ -f "$VERSION_FILE" ]] && INSTALLED_VERSION=$(tr -d '[:space:]' < "$VERSION_FILE" 2>/dev/null || true) + +# Decide whether to download. +NEED_DOWNLOAD=false +REASON="" +if [[ ! -x "$BINARY" ]]; then + NEED_DOWNLOAD=true + REASON="binary not installed" +elif [[ -n "$DESIRED_VERSION" ]] && [[ "$INSTALLED_VERSION" != "$DESIRED_VERSION" ]]; then + NEED_DOWNLOAD=true + REASON="plugin wants v${DESIRED_VERSION}, installed is v${INSTALLED_VERSION:-unknown}" +fi + +if $NEED_DOWNLOAD; then + echo "$BINARY_NAME: $REASON — downloading from $REPO..." >&2 + mkdir -p "$INSTALL_DIR" + + # Try pinned tag first, then fall back to latest release. + URL="" + for API_URL in \ + "${DESIRED_VERSION:+https://api.github.com/repos/$REPO/releases/tags/v$DESIRED_VERSION}" \ + "https://api.github.com/repos/$REPO/releases/latest" + do + [[ -z "$API_URL" ]] && continue + URL=$(curl -sL --max-time 30 "$API_URL" 2>/dev/null \ + | grep '"browser_download_url"' | grep "/$BINARY_NAME\"" | head -1 \ + | sed 's/.*"\(https[^"]*\)".*/\1/') + [[ -n "$URL" ]] && break + done + + if [[ -z "$URL" ]]; then + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: WARNING — no download URL found, keeping existing binary" >&2 + else + echo "$BINARY_NAME: ERROR — no download URL found at $REPO. Install manually: https://github.com/$REPO/releases" >&2 + exit 1 + fi + else + if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then + chmod +x "${BINARY}.tmp" + mv "${BINARY}.tmp" "$BINARY" + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${DESIRED_VERSION:-latest}" >&2 + else + rm -f "${BINARY}.tmp" 2>/dev/null + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: WARNING — download failed, keeping existing binary" >&2 + else + echo "$BINARY_NAME: ERROR — download failed" >&2 + exit 1 + fi + fi + fi +fi + +exec "$BINARY" "$@" diff --git a/plugins/che-word-mcp/.claude-plugin/plugin.json b/plugins/che-word-mcp/.claude-plugin/plugin.json new file mode 100644 index 0000000..5f44475 --- /dev/null +++ b/plugins/che-word-mcp/.claude-plugin/plugin.json @@ -0,0 +1,17 @@ +{ + "name": "che-word-mcp", + "version": "3.20.0", + "description": "v3.20.0 cross-document OMath splice MCP tools (#160) — splice_omath_from_source + splice_paragraph_omath_from_source wrap ooxml-swift v0.24.0's spliceOMath API for verbatim copy of XML blocks between WordDocument paragraphs. Source via source_path (Direct mode read-only) or source_doc_id (Session mode); target requires session-mode doc_id. Position via atStart/atEnd/afterText/beforeText with optional anchor + instance. Carrier preservation (Run.rawXML stays inline, unrecognizedChildren stays direct-child); joint document-order index for omath_index across both source carriers. rpr_mode controls source Run rPr propagation (full default verbatim / omathOnly whitelist / discard empty); namespace_policy controls prefix vs URI handling (lenient default accepts mml: vs m: prefix mismatch with same URI per ECMA-376 / strict throws on any mismatch). Error taxonomy returned as structured strings: sourceHasNoOMath / omathIndexOutOfRange / targetParagraphOutOfRange / anchorNotFound / namespaceMismatch / contextAnchorNotFound (batch only). Unblocks kiki830621/collaboration_guo_analysis Phase 7 inline-math restoration pipeline. Tests: 8 new Issue160SpliceOMathFromSourceTests (Direct/Session source modes, atEnd/afterText positions, error taxonomy, batch mode, rPr discard). Full suite: 297 passing, 0 failures, 9 pre-existing skips. Bumps ooxml-swift dep 0.21.0 → 0.24.0. v3.19.0 caption detection (#136) + estimate_paragraph_for_page structural weights v2 (#142). #136: two-layer detection — Paragraph.style primary + expanded prefix set (English Tab./Fig./Listing + CJK 表3/圖1 no-separator + U+3000 ideographic space) with digit-after-prefix guard rejecting body sentences like \"Table reservations are required...\". 17 new tests. #142: walker upgrade getParagraphs → collectStructuralBlocks enum (.paragraph/.table/.imageOnlyParagraph/.displayEquationParagraph). Per-block weights (12pt thesis calibration): table = tableRows × avgCellChars (200/row fallback), image = +200/drawing, display eq = 120 chars. New structural_breakdown metadata (9 sub-fields). API method bumped char_count_heuristic → char_count_heuristic_v2. ~95× thesis figure-counting accuracy improvement. paragraph_index semantics + getParagraphs() preserved (30+ other callers unaffected). Tests: 6 new + #89 backward-compat passes (text-only fixtures unchanged, just method literal updated). 1 P3 follow-up filed (#159 display-eq fixture limitation). v3.18.1 transitive dependency bump — ooxml-swift v0.21.11 → v0.22.1. Closes PsychQuant/che-word-mcp#155 (parent / mirror) auto-resolves via Package.resolved bump alone (no MCP source change). Pre-fix: che-word-mcp__search_text MCP tool's Server.swift:10310 calls para.getText(), which was a legacy 7-line implementation that only joined runs.map { $0.text } + hyperlink.text — missed every walker enhancement landed in flattenedDisplayText() over the #85 / #92 / #99 / #100 / #101 / #102 / #103 cluster. Callers couldn't grep for inline math symbols (α / β / γ / θ / λ / t) — silent zero gaps in match positions. Post-fix: ooxml-swift#43 collapsed Paragraph.getText() to single-line return flattenedDisplayText(); two text-extraction paths now return identical strings. Server.swift:10310 still calls para.getText() but that method now traverses inline OMML correctly. Position arithmetic now matches before_text / after_text anchor-matching paths. Downstream impact: kiki830621/collaboration_guo_analysis#6 (thesis 30 inline math symbols) unblocked from automation — re-running search_text 進行t檢定 against 碩士論文.docx para 324 now returns a match. Released via /idd-all #43 --cwd cross-repo IDD orchestration (issue-driven-dev v2.40.0). Closing summary at https://github.com/PsychQuant/ooxml-swift/issues/43#issuecomment-4365430488. v3.18.0 insert_equation argument-contract hardening (closes #105 #106 #107). v3.17.8 closes PsychQuant/che-word-mcp#98 — insert_equation MCP handler refactored across 3 commits (91506f8/357cbe7/339ab77). Pre-fix three structural defects: (1) silent-clamp on out-of-range paragraph_index via non-throwing insertParagraph(_:at: Int) — tool reported success but inserted at wrong location; (2) lib's #84/#91 InsertLocation overload + InsertLocationError.inlineModeRequiresParagraphIndex / .invalidParagraphIndex(Int) structured errors never reached MCP callers (handler self-built OMML and bypassed lib); (3) inline mode (display_mode=false) always created a NEW Paragraph(runs: [eqRun]) instead of appending OMML run to the EXISTING paragraph at paragraph_index. v1 (91506f8) tried delegating to lib but Codex 6-AI verify caught P1 regression: lib's Document.insertEquation overload internally uses @available(*, deprecated, ...) MathEquation flat output (Field.swift:301) — emits truncated '(a)/(b' AND nested invalid OOXML. v2 (357cbe7) unified handler — both latex AND components paths use MathComponent.toOMML() for structurally correct OMML; display mode routes through lib's throwing insertParagraph(_:at: InsertLocation); inline mode handler-side appends OMML run to existing paragraph. v3 (339ab77) restored eqPara.properties.alignment = .center for display mode. BREAKING for inline mode callers: pre-fix inserted NEW paragraph; post-fix appends OMML run to EXISTING paragraph at paragraph_index. Migration: use display_mode=true for 'new paragraph with equation' or call insert_paragraph + insert_equation separately. 7 NEW Issue98InsertEquationLibBypassTests pin contracts (5 RED→GREEN scenarios + 2 quality regression tests including unzip -p document.xml verifying structure + centering survives + deprecated '(a)/(b)' pattern absent). 6-AI verify ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) caught both P1 regressions in v1; Codex sanity check on v2 caught the centering P2. 6 P2/P3 follow-up issues filed (#105-#110). Suite: 236 → 243 tests, 0 failures, 9 pre-existing skips. Backward compatible except inline-mode BREAKING (documented). v3.17.7 ooxml-swift dep bump 0.21.10 → 0.21.11 — closes 5-issue cluster PsychQuant/che-word-mcp #99 + #100 + #101 + #102 + #103. Bilateral mirror coverage for direct-child OMML at 4 wrapper positions ( direct child for Pandoc display math / direct child / direct child / nested wrapper combos), plus 2 NEW library-wide spec capabilities. Pre-fix Paragraph.flattenedDisplayText AND Document.replaceInParagraphSurfaces shared a symmetric blind spot: direct-child / (not wrapped in ) was silently dropped → anchor lookups against paragraphs containing display math silently 0-matched. Spectra change flatten-replace-omml-bilateral-coverage. 2 NEW spec capabilities promoted to openspec/specs/: ooxml-paragraph-text-mirror (mirror invariant + ReplaceResult informative refusal contract) + ooxml-library-design-principles (Correctness primacy + Human-like operations as foundational normative invariants for all ooxml-swift mutators). Read side: flattenedDisplayText walks direct-child OMML at all 4 wrapper positions with source-XML position ordering. Write side: NEW public API WordDocument.replaceTextWithBoundaryDetection returns ReplaceResult enum (.replaced(count:) / .refusedDueToOMMLBoundary(occurrences:) / .mixed(replacedCount:, refusedOccurrences:)) with Occurrence(matchSpan:, ommlSpans:) carrying flattened-text coordinates. Mirror invariant — asymmetric by design: reads include OMML visibleText (anchor lookup universe extends to math); writes treat OMML as opaque structural units (refuse cross-OMML mutation rather than silently delete equations). Decision 4 raw passthrough preserved: direct-child OMML stays in Paragraph.unrecognizedChildren / HyperlinkChild.rawXML(_) / AlternateContent.rawXML — no parser change, no writer change, round-trip fidelity unaffected. MCP impact: replace_text and other anchor-lookup tools now find paragraphs containing direct-child OMML at all 4 wrapper positions; existing replace_text MCP tool unchanged (backward-compatible). Tests: 236 passing che-word-mcp / 813→829 ooxml-swift (+16 in Issue99FlattenReplaceOMMLBilateralTests). Backward compatible — strict superset of pre-fix behavior. v3.17.6 ooxml-swift dep bump 0.21.9 → 0.21.10 — closes PsychQuant/che-word-mcp#104. Form-level FieldParser canonical 5-run fldChar fix (orthogonal to v3.17.5's #94 container-level fix). Pre-fix update_all_fields returned silent no-op ('no SEQ fields found') on docs containing valid SEQ paragraphs at body top level when fldChar block was emitted in canonical 5-run form (each // in its own sibling — what DocxReader produces post-roundtrip and what native Word always emits). Pre-fix worked only on in-memory wrap_caption_seq output before save. Two ooxml-swift commits land via this dep bump: 537de62 FieldParser two-phase parse (Phase-1 baked form + Phase-2 parseFiveRunSpan state machine probing both Run.rawXML and Run.rawElements per recognizedRunChildren = ['rPr','t','drawing','oMath','oMathPara'] allowlist) + 58fe4f9 P1 sub-fix surfaced by 6-AI verify (Logic + Devil's Advocate runtime test): canonical-branch Run.text rewrite was silently overridden by Run.toXML() rawXML short-circuit; new rewriteCanonicalCachedText helper splices new value into embedded while preserving + xml:space=preserve, AND keeps Run.text in sync. MCP impact: update_all_fields now finds and updates SEQ fields in canonical 5-run form (post-roundtrip / native Word emission); list_captions benefits transitively via shared FieldParser. Verified by 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) — 4 PASS / 2 WARN / 0 BLOCK; production reproducer rescue-swift-v317.docx via DocxReader path confirmed working. 5 P3 follow-ups filed in ooxml-swift (#29 SEQ Table coverage / #30 multi-paragraph counter / #31 multi-SEQ same paragraph / #32 DoS hardening / #33 discriminator invariant). Tests: 236 passing che-word-mcp / 809→813 ooxml-swift (+4 sub-tests in Issue104FieldParserCanonicalFormTests). Backward compatible — strict superset of pre-fix behavior. v3.17.5 ooxml-swift dep bump 0.21.8 → 0.21.9 — triple #87 + #93 + #94 release. #87 (Comment.paragraphIndex flat-counter, observable behavior change): list_comments paragraph_index now consistently 0-indexed against get_paragraphs() flat list; pre-fix off-by-N for any docx with non-paragraph BodyChild siblings before commented paragraph; callers manually compensating with paragraph_index - 1 must remove compensation. #93 (wrap_caption_seq SEQ inherits source position, caption visual fix): pre-fix 「圖 4-1:xxx」 became 「圖 4-:xxx1」 because new SEQ run had position=nil while source-loaded preText/postText had position>0; one-line fix seqRun.position = preRun.position; insert_bookmark=true × source-loaded paragraph still has same gap (filed PsychQuant/ooxml-swift#24, default insert_bookmark=false unaffected). #94 (update_all_fields traverses .table and .contentControl containers): pre-fix body loop only processed top-level .paragraph BodyChild, silently skipped .table and .contentControl(_, children:) — SEQ fields inside table cells/block-level SDTs never updated, returning 'no SEQ fields found' for thesis docs (caption paragraphs commonly live inside the table they describe); same gap #68 closed for findBodyChildContainingText; new walkAndProcessBodyChildForFields recursive walker mirrors #68 pattern; heading-count semantics: only top-level direct .paragraph body children count toward chapter-reset. Known incompleteness (3 follow-ups filed): ooxml-swift#25 header/footer/footnote/endnote SEQ scans still flat .paragraphs view; ooxml-swift#26 FieldParser.parse(paragraph:) misses inline SDT/hyperlink/fieldSimple/alternateContent surfaces; ooxml-swift#27 verify-with-user-fixture for real thesis docx roundtrip; plus ooxml-swift#28 refactor candidate (extract BodyChildVisitor protocol). Verified by 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh). Tests: 236 passing che-word-mcp / 805→809 ooxml-swift. Backward compatible except #87 documented behavior change. v3.17.4 paired #91 + #92 release. v3.17.3 bump ooxml-swift dep 0.21.6 → 0.21.7 (pure transitive bump, no MCP source changes; exposes public anchor lookup API). Three WordDocument APIs upgraded private → public (PsychQuant/che-word-mcp#86): findBodyChildContainingText(_:nthInstance:) instance method + bodyChildContainsText(_:needle:) static + tableContainsText(_:needle:) static. External Swift SPM consumers (rescue scripts, dxedit CLI, third-party tooling) can now call canonical anchor-lookup logic directly instead of reimplementing with diverging semantics (some skipped .contentControl recursion, some skipped table cell traversal pre-#68, some used different nthInstance counting rules). Result is exactly what insert_paragraph / insert_image_from_path / insert_caption etc. tools see when resolving after_text / before_text anchors. 10 new public-API surface tests in Issue86PublicAnchorLookupTests pin the canonical behavior across releases. Backward compatible: pure additive private → public visibility change; no API removals, no behavioral changes for existing callers. v3.17.2 bump ooxml-swift dep 0.21.2 → 0.21.6 (pure transitive bump, no MCP source changes; 4 ooxml-swift releases worth of hardening + new APIs land transparently). v0.21.3 (XML hardening, PsychQuant/ooxml-swift#7): DTD reject + 64KB attr-value cap + SAX-based root-element attribute parsing + name whitelist on emit. New XMLHardeningError throws on malicious .docx input. v0.21.4 (roundtrip loud-fail, PsychQuant/ooxml-swift#6): AlternateContent.fallbackRunsModified dirty flag throws RoundtripError.unserializedFallbackEdit on stale fallbackRuns mutation. Run.commentIds @available deprecated; migrate to commentRangeMarkers. v0.21.5 (insertEquation flexibility, PsychQuant/che-word-mcp#84 #85): InsertLocation overload for Document.insertEquation + flattenedDisplayText OMML coverage extends anchor lookup beyond plain runs. v0.21.6 (mutation surface, PsychQuant/ooxml-swift#5): Hyperlink.text setter @available deprecated (lossy); migrate to .runs property. Position field cascade Int = 0 → Int? = nil across 13 typed-child models. xml:space=preserve autosense in Run.toXMLThrowing emit. Plus 3 unreleased docs commits on ooxml-swift main (PsychQuant/ooxml-swift#14 #15 #17 + corrective cd841e7) covering needsPPr emit-gate ↔ Issue4 lock-in test bidirectional reference, parseRun vs parseParagraph walker pattern divergence rationale, foreign-namespace pPr asymmetry documentation. Tests: 236 passing (no regressions). Backward compatible: deprecation warnings only; no API removals. v3.17.1 bump ooxml-swift dep to v0.21.2 — pulls in pPr regression-guard + test infrastructure hardening from upstream (ooxml-swift#4 walker whitelist + #if DEBUG assert / ooxml-swift#13 empty self-closing test gap / ooxml-swift#16 countPPrOpenTags regex hardening excluding ). No public MCP tool change. v3.17.0 wrap_caption_seq MCP tool (Refs #62): Phase 2 of cross-repo work — exposes ooxml-swift v0.21.0 lib API as MCP tool. Bulk-wraps plain-text caption number portions in SEQ field runs across body paragraphs whose flattened text matches a regex (EXACTLY ONE numeric capture group). Captured digit becomes SEQ field cachedResult so Word's first-open render preserves user-typed numbering before F9. Rescues docs pasted from external sources (LaTeX-converted Word, Google Docs, Pandoc) so insert_table_of_figures / insert_table_of_tables produce populated TOFs. Idempotent: paragraphs already wrapping a SEQ field for sequence_name reported in skipped, never double-wrapped (detection covers both FieldSimple AND rawXML fldChar emissions). Phase 1 ships scope:body only (recurses into table cells + nestedTables + block-level SDT children); scope:all returns Error: scope_not_implemented for now (cross-container path lands in v3.17.x). Bookmark wrap opt-in (insert_bookmark + bookmark_template with literal ${number}) so default 23-caption rescue does NOT pollute list_bookmarks. Returns JSON: {matched_paragraphs, fields_inserted, paragraphs_modified:[idx,...], skipped:[{paragraph_index, reason},...]}. All preconditions checked BEFORE document mutation (regex compile + capture-group count + format/scope enums + bookmark_template invariant + doc_id opened). Tests: 5 new sub-tests in Issue62WrapCaptionSeqTests covering Scenarios 1-5. Suite 231 → 236 (+5, 0 fail / 9 skip). No ooxml-swift dep bump (still v0.21.0 from v3.16.2). v3.16.2 ooxml-swift dep bump 0.20.5 → 0.21.0 (Refs #62 #68): pure dep bump, no MCP source changes. Picks up two ooxml-swift fixes that surface transparently via existing tool dispatch. #68 (ooxml-swift v0.20.6): InsertLocation.findBodyChildContainingText now traverses .table (rows × cells × paragraphs + nestedTables) and .contentControl(_, children:) (recursive). MCP impact — insert_paragraph / insert_image_from_path / insert_equation / insert_caption calls using before_text / after_text now succeed when anchor text lives inside a table cell or block-level SDT (common in thesis docs with figure/table captions inside table cells). Returned position is top-level body.children index of the containing structure. Use into_table_cell for inside-cell inserts. Empty-needle guard: passing before_text:'' / after_text:'' now returns textNotFound instead of silently inserting at index 1. #62 (ooxml-swift v0.21.0): WordDocument.wrapCaptionSequenceFields(...) is now linked into the binary. Not yet exposed as an MCP tool — the wrap_caption_seq MCP wrapper ships in v3.17.0 (Phase 2 of the cross-repo work). Existing MCP tools unaffected. Suite: 231 → 231 (0 fail / 9 skip). v3.16.1 anchorPresence whitelist drift prevention (Refs #80): pure refactor, no runtime behavior change. New static toolAnchorWhitelists dict (single source of truth, keyed by MCP tool name → accepted anchor list) + new detectPresentAnchors(_:tool:) overload. 4 conflict-detection call sites switched from literal anchor arrays to (tool:) lookup. 4 new invariant/parity tests. Suite 227 → 231 (+4). Old (args, anchors:) overload preserved. Out-of-scope follow-ups: schema descriptions + dispatcher if-else chains still hardcode anchor names (pre-existing surfaces, not introduced by this PR). v3.16.0 Bundle B anchor DX consistency (Refs #70 #71 #72): BREAKING (input validation only) — three coordinated MCP-layer changes across the 4 #61-target tools. #71 (behavior) silent priority on conflicting anchors → structured error: insert_paragraph(after_text + index) was previously silent-priority; now returns 'Error: insert_paragraph: received conflicting anchors: after_text + index. Specify exactly one.' New static helper detectPresentAnchors with per-anchor type-aware predicates (null and wrong-type values do NOT count). #72 (validation) explicit text_instance ≤ 0 rejected — 'Error: : text_instance must be ≥ 1, got .' Omitted text_instance still defaults to 1. #70 (DX) all 32 'return Error:' lines in 4 #61-target tools rewritten as 'Error: : ' for AI-caller error attribution. throw WordError.* paths unchanged. Scope deliberately limited to 4 tools; remaining 41 return Error lines elsewhere deferred to error-prefix-sweep follow-up. SemVer rationale (minor not major): no schema break, no tool removal, restricting previously-undefined behavior. Tests: 201 → 227 (+26 sub-tests, 0 fail / 9 skip). No ooxml-swift dep bump (still v0.20.5). v3.15.3 Bundle A2 polish from v3.15.2 verify R3-R6 follow-ups (Refs #76 #77 #78 #79): #76 (docs) insert_caption description corrected from '三種 anchor' to enumerate all 5 (paragraph_index / after_image_id / after_table_index / after_text / before_text); insert_equation paragraph_index description clarified that the int is body.children-indexed (cross-references PsychQuant/ooxml-swift#10 for the lib-layer convention split). #77 (docs) insert_caption anchor set wording precision in CHANGELOG / manifest / marketplace.json / plugin.json — was 'its own anchor set including after_table_index' (implies disjoint), now 'shares after_image_id / after_text / before_text / paragraph_index, adds after_table_index + position, lacks into_table_cell' (explicit shared/adds/lacks). #78 (test) extends #69 append-index regression pin to bookmarkMarker / rawBlockElement / block-level contentControl body-children — the table case alone wouldn't catch a regression to getParagraphs().count - 1 that breaks for SDT / TOC bookmark / vendor extensions. #79 (test) adds round-trip depth: testInsertParagraphAppendIndexRoundTripsForInsertCalls demonstrates insert-family round-trip works (append + insert(N+1) + verify ordering); testInsertParagraphAppendIndexCannotRoundTripToUpdate pins the cross-family trade-off (update_paragraph(index=N) throws WordError.invalidIndex). Tests: 196 → 201 (+5 sub-tests, 0 fail / 9 skip). No production code change. No ooxml-swift dep bump (still v0.20.5). v3.15.2 closes Bundle A polish from #61 R2 verify (Refs #69 #73 #74 #75): #69 (bug) insert_paragraph append message reports body.children index instead of getParagraphs().count - 1 (mis-reported in docs with tables/SDTs by skipping table children); #74 (bug) insert_image_from_path debug log labels after_image_id correctly (was silently labeled 'index' since v3.15.1); #73 (test) regression pin for equation F5 partial-dict guard (existed since v3.15.1 but was untested); #75 (docs) clarifies '3 insert tools' wording — scope is the 3 #61-target tools (insert_paragraph / insert_equation / insert_image_from_path); insert_caption is a 4th insert tool with a partially-overlapping anchor set (shares after_image_id / after_text / before_text / paragraph_index, adds after_table_index + position, lacks into_table_cell), intentionally outside this unification scope. Tests: 194 → 196 (0 fail / 9 skip). No behavior change in normal call paths. No ooxml-swift dep bump (still v0.20.5). Word MCP Server - Swift 原生 OOXML 操作,233 個工具。v3.15.1 closes verify findings F1+F2+F3+F5 from v3.15.0 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh):F1 (P1) `after_image_id` anchor 加到 insert_paragraph + insert_equation (display only) + insert_image_from_path — lib InsertLocation.afterImageId 從 #44 起就 ready 但只有 insert_caption 暴露 MCP-layer;v3.15.0 inherited 這個 gap,本 release 補齊。F2 (P1) `into_table_cell` 加到 insert_equation (display only) — display equation 是新建 paragraph,cell 放置 well-defined;inline mode 拒絕。F3 (P2) equation 成功訊息加 anchor info('Inserted equation (display mode: true, after text X (instance N))' 等)— 關閉同 v3.14.4 LOOKUP 的 over-claim 模式(caller 之前無法區分 anchor 命中 vs append fallthrough)。F5 (P2) malformed `into_table_cell` partial dict(傳 `{table_index: 0}` 缺 row + col)silent fallthrough → 走 next anchor / append → 結果在錯位置且 caller 不知。改回 structured 'Error: into_table_cell requires all three fields',3 #61-target tools 同步修(cross-cutting consistency)。Anchor priority unified across all 3 #61-target insert tools (`insert_paragraph` / `insert_equation` / `insert_image_from_path`; `insert_caption` has its own anchor set):into_table_cell > after_image_id > after_text > before_text > index > append。Inline equation 拒絕擴大 — 現在拒絕所有 4 個 anchor params(before/after_text + after_image_id + into_table_cell),不只 v3.15.0 的 2 個。Tests: Issue61V315PointReleaseTests (9 sub-tests cross 3 tools)。Suite 185 → 194 (0 fail / 9 pre-existing skips)。**No ooxml-swift dep bump** — 仍 v0.20.5(lib 從 #44 起就 ready)。Follow-up issues 另開:F4 inline equation 更通用設計 (e.g. into_paragraph_with_text) / F6 text anchor 擴及 table-cell paragraphs 與 block-level SDT / F7 getParagraphs().count - 1 message 在 doc 含 tables/SDTs 時 mis-report (pre-existing) / F8 error message 加 tool-prefix / F9 multiple anchor params 同時傳入 silent priority winner / F10 text_instance≤0 normalize。Backward compatible — schema additions optional,既有 v3.15.0 callers 不變;只有 malformed into_table_cell 從 silent fallthrough 改成 structured error(會被 buggy caller 注意到)+ equation message 加 suffix(substring 'Inserted equation' 仍存在)。v3.15.0 closes #61 — insert_paragraph 與 insert_equation 現在接受跟 insert_image_from_path 一致的 anchor 參數(after_text / before_text / text_instance / into_table_cell — into_table_cell 僅 insert_paragraph)。Pre-fix MCP 層 silently drop 這些參數 — JSON schema 接受但 handler dispatch 忽略,呼叫 fall through 到 legacy paragraph_index path 或 append at end。Lib API Document.insertParagraph(_: at: InsertLocation) 從 #44 起就支援所有六種 anchor cases(paragraphIndex / afterImageId / afterTableIndex / intoTableCell / afterText / beforeText),本 release 補齊 MCP 側 wire-up gap,無需 ooxml-swift dep bump(v0.20.5 已足夠)。Anchor priority mirror insert_image_from_path:into_table_cell > after_text > before_text > index > append。Errors(textNotFound / tableIndexOutOfRange / tableCellOutOfRange)回 structured 訊息而非 silent fallthrough — AI caller 能 surface failure 而非拿到位置錯誤的 misleading 'success'。**Inline equation explicit rejection**:insert_equation 在 display_mode=false(inline)時 explicitly 拒絕 after_text / before_text,回 structured error — 語意模糊('append OMML run into existing para containing this text' vs 'insert new para before/after target para'),inline placement 仍用 paragraph_index。Display-mode equation 建新 paragraph,anchor 語意明確。Tests: Issue61InsertParagraphAnchorsSmokeTests(5 sub-tests:after_text resolution / before_text resolution / text_instance disambiguation / into_table_cell append / textNotFound error)+ Issue61InsertEquationAnchorsSmokeTests(4 sub-tests:after_text + before_text in display mode / inline mode rejection / textNotFound error)。Suite 176 → 185 (0 fail / 9 pre-existing skips)。**No ooxml-swift dep bump** — v0.20.5 已有所有需要的 lib API。Backward compatible — anchor params 全 optional;既有 index / paragraph_index callers 不變;無 schema removal、無既有行為改動。**Real-world impact**:thesis-rescue / template-population workflow 不再需要 fall back 到「append at end + 手動 cut/paste in Word UI」或 binary-search 猜 paragraph_index,AI caller 對 3 #61-target insert tools(insert_image_from_path / insert_paragraph / insert_equation)對稱地用 surrounding context 定位 anchor。v3.14.5 closes Refs #63 verify F1 P1:擴充 findBodyChildContainingText 涵蓋所有 editable surfaces,補上 v3.14.4 CHANGELOG over-claim 的 insert anchor lookup gap。Pre-fix v3.14.4 只修了 REPLACE path(replace_text → Document.replaceInParagraphSurfaces 走 contentControls / hyperlinks / fieldSimples / alternateContents)但 LOOKUP path(findBodyChildContainingText 用於 InsertLocation.afterText / .beforeText 解析)只看 para.runs,所以 insert_image_from_path / insert_paragraph / insert_caption before_text/after_text 對 SDT-wrapped anchor 仍丟 textNotFound。Verify ensemble(5 Claude reviewers + Codex)的 requirements F1 P1 finding 抓到 CHANGELOG over-claim — 用戶選擇 Option B 擴充修而非縮 scope。ooxml-swift v0.20.5 新增 TextReplacementEngine.flatTextOfContentXML(read-only XML walker mirror replaceInContentXML flattening rules,跳過 / / nested subtrees)+ Paragraph.flattenedDisplayText 擴充 method 涵蓋 runs + hyperlinks + fieldSimples + alternateContents + contentControls(recursive into nested SDT children)。findBodyChildContainingText 改用 flattenedDisplayText 取代原本的 para.runs.map { $0.text }.joined()。新增 Issue63InsertAnchorInlineSDTTests(lib,3 wrappers × afterText/beforeText/insertImage = 3 sub-tests / 5 assertions)+ Issue63InsertAnchorInlineSDTSmokeTests(MCP,2 sub-tests pin lib-layer fix)。Suite 693 → 696 ooxml-swift / 174 → 176 che-word-mcp(0 fail)。基於 ooxml-swift v0.20.5。Backward compatible — strict superset of pre-fix lookup behavior(找到更多 anchors,既有 plain runs anchor 仍照常運作)。**Insert anchor lookup gap 此 release 完整補齊**,所有 inline wrappers 在 REPLACE + LOOKUP 兩個 path 都對稱覆蓋。v3.14.4 修 replace_text 對 inline `` content control 的 wrapper coverage gap(Refs #63):Document.replaceInParagraphSurfaces 之前覆蓋 paragraph.runs / hyperlinks / fieldSimples / alternateContents 但 **沒有** paragraph.contentControls — 包在 inline `` 裡的文字 silently 0-match。外部 converter(pandoc / Quarto / LaTeX→docx)習慣把 cross-ref placeholder([tab:foo] / [fig:bar] / [Smith 2020])包成 inline SDT,所以症狀跟 bracketed text 高度相關,但其實 **brackets 是 coincidence** — bracket-free needle 在 inline SDT 裡也 fail。Issue title「literal `[ ]` brackets」是誤導,差別測試(fldChar / fldSimple / hyperlink / inlineSDT 四個 inline wrapper × 四種 needle)證實只有 inline SDT case 失敗,其他三個 wrapper 從 v0.19.0+ #56 Phase 5 起就 typed-runs 覆蓋好了。Surgical fix architecture:ooxml-swift v0.20.4 新增 TextReplacementEngine.replaceInContentXML(XML DOM walker,wrap ContentControl.content 在 synthetic root xmlns:w,遍歷所有 `` descendants 在 document order,build flat string + offset map mirror flattenRuns invariant,run same literal/regex find logic,splice replacements 回 `` element string content;re-serialize wrapper children 去掉 wrapper tag)+ Document.replaceInContentControl(recursive helper 涵蓋 cc.content + cc.children 處理 nested SDT)。Wired 進 Document.replaceInParagraphSurfaces 接在 alternateContents loop 之後。設計上跳過:``(TC deletion text,不顯示)、``(field instruction code,不顯示)、nested `` subtrees(typed cc.children 由外層 recursion 處理避免 double-replacement)。Round-trip discipline:只 mutate `` element 的 string content;xml:space=\"preserve\" 與其他 attribute 完整保留(attribute set 從不被 touch)。新增 Issue63InlineSDTReplaceTests(4 個 wrapper × 4 個 needle 的 differential test + nested SDT recursion + round-trip wrapper preservation = 3 sub-tests / 18 assertions)+ MCP-layer Issue63ReplaceTextInlineSDTSmokeTests(2 sub-tests pin lib-layer fix)。Suite 690 → 693 ooxml-swift / 172 → 174 che-word-mcp(0 fail)。基於 ooxml-swift v0.20.4。Backward compatible — surgical fix 只新增 code path,沒改任何既有行為(runs/hyperlinks/fieldSimples/alternateContents replacement path 不動,ContentControl model 維持 raw XML storage 不重構)。Out-of-scope(separate follow-up):ContentControl 從 content:String 升級為 typed Run 列表(SDD-warranted refactor);smartTags / bidiOverrides / customXmlBlocks / unrecognizedChildren 維持 raw-carrier passthrough。v3.14.3 sub-stack E of paragraph-level content-equality (closes #66):Paragraph 新增 w14ParaId / w14TextId 欄位,提取並 round-trip opening tag 上的 w14:* 屬性(Word 用於 collaborative editing 和 comment threading 的 revision-tracking GUIDs)。Plain attribute passthrough,String? typing — Word 的 GUIDs 是 8-char hex tokens(NOT RFC 4122 UUIDs),所以 opaque-string round-trip 是正確選擇。Pre-fix v3.14.2 silently dropped 兩個 attributes — 佔了 NTPU 論文 fixture w14:* token loss 的 ~95%(2214 / 2359 lost tokens 是這兩個 attrs)。Post-E 量測:w14: 保留率 10.55% → 93.98%;document.xml 流失 10.95% → 8.02%。Combined with sub-stack D (#65), total impact since v3.14.1: 50% → 98.89% (D)、w14:* 5% → 93.98% (E)、document.xml 流失 16.66% → 8.02% (D+E, -8.64 pp)。Matrix-pin testDocumentContentEqualityInvariant 同步抬升 floor(w14: 0.04→0.90、sizeLossRatio 上限 0.12→0.10)— matrix-pin 現在 LOAD-BEARING across **5 preservation classes**(rFonts/noProof/lang/kern/w14:)spanning run-level + paragraph-level + paragraph-mark scope。Defensive design (R2 review fixes):openingPTag() routes attributes through escapeXMLAttribute;parseParagraph rejects schema-invalid empty-string GUIDs。基於 ooxml-swift v0.20.3。Backward compatible(兩個 fields 都 optional、default nil;openingPTag empty-attrs gate 防止 synthetic emit)。剩餘 8% 流失主要是其他 w14:* attribute classes(如 w14:* on )— tracked as separate follow-up SDD。v3.14.2 sub-stack D of paragraph-level content-equality (closes #65):ParagraphProperties 新增 markRunProperties 欄位,提取並 round-trip direct child of — paragraph-mark formatting per ECMA-376 §17.3.1.27 CT_PPrBase(控制 pilcrow ¶ 字符外觀的字型/顏色/語言/字距)。Reuses parseRunProperties verbatim — schema 跟 run-level CT_RPr 一致,所以 sub-stack C 的 typed extraction(rFonts 4-axis / noProof / kern / lang 3-axis)和 rawChildren passthrough(w14:* 效果)全部免費繼承。NTPU 論文 fixture 量測影響: 保留率 50% → 98.89%; 88% → 98.77%; 92% → 100%; 84% → 99.93%;document.xml 大小流失 16.66% → 10.95%。Matrix-pin testDocumentContentEqualityInvariant 同步抬升 floor(lang 0.45→0.95、rFonts/noProof/kern 0.95、sizeLossRatio 上限 0.175→0.12)。Sub-stack E (#66 w14:paraId/textId) 接著 ship 到 v3.14.3,把流失壓到 < 5%,達成「edit 一個字 → document.xml shrinks <1%」strong demo。基於 ooxml-swift v0.20.2。Backward compatible(markRunProperties optional、default nil、writer empty-gate 防止 synthetic empty )。v3.14.1 sub-stack C-CONT closes triple-confirmed P0 (R2 + R5 + Codex 6-AI verify):recognizedRprChildren Set 列了 ~16+ rPr child kinds 為 'recognized' 但 parseRunProperties 沒有 typed extraction → silent drop。受影響的常見元素:(character spacing)、/(run shading)、(CJK emphasis marks)、///////。Fix:trim Set 到 ONLY actually-typed-extracted-or-emitted kinds。Round-trip size loss: pre-fix v3.13.x 32% → v3.14.0 17.75% → v3.14.1 16.66%。Methodology lesson (6th):P2 from one reviewer can become P0 when another applies real-world impact lens. v3.14.0 closes #60(sub-stack C of #58/#59/#60)— RunProperties field-loss audit。Bump ooxml-swift v0.19.13→v0.20.0。新增 typed fields:4-axis rFonts (ascii/hAnsi/eastAsia/cs/hint — 之前被收斂成單一值)、noProof、kern、3-axis lang (val/eastAsia/bidi),加上 rawChildren passthrough 處理 unrecognized rPr children(如 w14:textOutline / w14:textFill / w14:glow)。**Pre-fix MCP 用戶看到 eastAsia/cs 字型(如 DFKai-SB 用於繁體中文)在 round-trip 時 silently 被替換成 ascii 值;v3.14.0 完整保留 4 個 axis**。Matrix-pin testDocumentContentEqualityInvariant 加上 preservation-class-3 ratio-floor assertions,現在 LOAD-BEARING — 任何未來 RunProperties regression 都會被 matrix-pin 抓到。Thesis fixture document.xml round-trip 大小:pre-fix 32% 損失 → post-sub-stack-C 17.75% 損失(改善 14.25 percentage points)。剩餘 17.75% 是 paragraph-mark rPr + w14:paraId/textId drops(separate out-of-scope follow-up SDD)。**'if not typed, preserve as raw' 原則架構性完成** — 從 sub-stack A (#58 BodyChild)、B (#59 WhitespaceOverlay) 一路發展到 C (#60 RunProperties)。Backward compatible — 保留 fontName field,mirror rFonts.ascii。v3.13.13 CRITICAL HOTFIX (sub-stack B-CONT-2-CONT) reverted v3.13.12 的 TIER-0 over-fix。v3.13.12 (DO NOT USE — 刪除 內容)。v3.13.11 sub-stack B-CONT。基於 ooxml-swift v0.20.0。", + "author": { + "name": "Che Cheng" + }, + "license": "MIT", + "keywords": [ + "mcp", + "word", + "docx", + "ooxml", + "document", + "swift" + ] +} \ No newline at end of file diff --git a/plugins/che-word-mcp/.mcp.json b/plugins/che-word-mcp/.mcp.json new file mode 100644 index 0000000..ad4f7de --- /dev/null +++ b/plugins/che-word-mcp/.mcp.json @@ -0,0 +1,7 @@ +{ + "word": { + "type": "stdio", + "command": "${CLAUDE_PLUGIN_ROOT}/bin/che-word-mcp-wrapper.sh", + "description": "Word (.docx) MCP server — Swift-native OOXML manipulation, 218+ tools. Read/write paragraphs, runs, tables, hyperlinks, headers/footers, sections, styles, numbering, content controls (SDT), comments, footnotes/endnotes, equations, fields. v3.12.0: programmatic Track Changes (insert_text_as_revision / delete_text_as_revision / move_text_as_revision + as_revision flag on format_text / set_paragraph_format). Built on ooxml-swift v0.18.0. Office.js OOXML Roadmap P0 100% complete (PsychQuant/che-word-mcp#43)." + } +} diff --git a/plugins/che-word-mcp/CHANGELOG.md b/plugins/che-word-mcp/CHANGELOG.md new file mode 100644 index 0000000..3339897 --- /dev/null +++ b/plugins/che-word-mcp/CHANGELOG.md @@ -0,0 +1,236 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +> ⚠ This file was bootstrapped by `changelog-tools:changelog-init` from the +> `plugin.json` description field. Section categorization is best-effort — +> review and refine `Added` / `Changed` / `Fixed` etc. as needed. + +## [Unreleased] + +## [3.20.0] - 2026-05-04 + +### Added + +- **Cross-document OMath splice MCP tools** ([#160](https://github.com/PsychQuant/che-word-mcp/issues/160)) — two new MCP tools wrapping [PsychQuant/ooxml-swift v0.24.0's `spliceOMath` API](https://github.com/PsychQuant/ooxml-swift/issues/57): + - `splice_omath_from_source` — single-OMath low-level splice between two documents. Source via `source_path` (Direct mode read-only) or `source_doc_id` (Session); target requires session-mode `doc_id`. Position via `atStart` / `atEnd` / `afterText` / `beforeText` with optional `anchor` + `instance`. `omath_index` 0-based, joint document-order across both source carriers. `rpr_mode`: `full` (default verbatim) / `omathOnly` (whitelist rFonts/sz/lang/bold/italic) / `discard` (empty). `namespace_policy`: `lenient` (default — accepts `mml:` vs `m:` prefix mismatch with same URI per ECMA-376) / `strict` (any mismatch throws). + - `splice_paragraph_omath_from_source` — paragraph-level batch convenience. Splices all OMath blocks from source paragraph in source-document order via auto-derived ~10-char context anchors. Returns count or `contextAnchorNotFound(omath_index, snippet)` with partial-success state. + +### Changed + +- Bumps `ooxml-swift` dep `0.21.0` → `0.24.0` (required for `spliceOMath` API). + +### Tests + +- `Issue160SpliceOMathFromSourceTests` — 8 cases covering Direct/Session source modes, atEnd / afterText positions, error taxonomy, batch mode, rPr discard. +- Full suite: 297 tests, 0 failures, 9 pre-existing skips. + +### Use case + +Unblocks [kiki830621/collaboration_guo_analysis#17](https://github.com/kiki830621/collaboration_guo_analysis/issues/17) Phase 7 inline-math restoration — 522 inline OMath blocks restored from `_raw.docx` into `碩士論文-rescue-swift-v317.docx` (95.5% paragraph coverage in first pass; 100% after suffix-anchor fallback in [collaboration_guo_analysis@43465a6](https://github.com/kiki830621/collaboration_guo_analysis/commit/43465a6)). + +## [3.19.0] - 2026-05-04 + +### Fixed — `find_inline_math_gaps` caption detection ([#136](https://github.com/PsychQuant/che-word-mcp/issues/136)) + +Two-layer detection. **Layer 1**: `Paragraph.style?.lowercased().contains("caption")` — Word's canonical OOXML signal. **Layer 2**: 7 English prefixes (Table / Figure / Tab. / Fig. / Listing) + 9 CJK prefixes (incl. U+3000 ideographic space) + CJK no-separator regex `^[表圖图]\d` + label-only `表格`/`图表`. Digit-after-prefix guard rejects body sentences like "Table reservations are required...". `isLikelyTableCaption(_:)` signature changed `String → Paragraph`; private → internal for `@testable`. PR [#157](https://github.com/PsychQuant/che-word-mcp/pull/157), 17 new tests. + +### Fixed — `estimate_paragraph_for_page` structural weights v2 ([#142](https://github.com/PsychQuant/che-word-mcp/issues/142)) + +Walker upgrade: `getParagraphs()` → `collectStructuralBlocks()` returning `[StructuralBlock]` enum (`.paragraph` / `.table` / `.imageOnlyParagraph(drawingCount:)` / `.displayEquationParagraph`). Per-block weights (12pt thesis calibration): table = `tableRows × avgCellChars` (200/row fallback), image = +200 chars/drawing, display eq = 120 chars. New `structural_breakdown` field with 9 sub-fields. `method` field bumped `char_count_heuristic` → `char_count_heuristic_v2`. ~95× thesis figure-counting accuracy improvement. PR [#158](https://github.com/PsychQuant/che-word-mcp/pull/158), 6 new tests + #89 backward-compat (text-only fixtures unchanged). + +### Backward compat preserved + +- `getParagraphs()` UNCHANGED — 30+ other callers unaffected +- `paragraph_count` semantics: body-paragraph blocks only (paragraph_index math invariant preserved) +- `chars_per_page` override path UNCHANGED +- Existing #89 / #94 tests pass; only `Issue89...Tests:29` `method` literal updated to `_v2` + +### Verified + +- `swift build`: clean (only pre-existing deprecation warnings) +- `swift test`: 289 pass, 9 pre-existing skips, 0 failures +- 1 P3 follow-up filed: [#159](https://github.com/PsychQuant/che-word-mcp/issues/159) (display-eq fixture limitation, non-blocking) + +## [3.17.8] - 2026-05-01 + +### Changed +- closes PsychQuant/che-word-mcp#98 — insert_equation MCP handler refactored across 3 commits (91506f8/357cbe7/339ab77). +- **BREAKING:** BREAKING for inline mode callers: pre-fix inserted NEW paragraph; post-fix appends OMML run to EXISTING paragraph at paragraph_index. +- Suite: 236 → 243 tests, 0 failures, 9 pre-existing skips. +- **BREAKING:** Backward compatible except inline-mode BREAKING (documented) + +### Deprecated +- Pre-fix three structural defects: (1) silent-clamp on out-of-range paragraph_index via non-throwing insertParagraph(_:at: Int) — tool reported success but inserted at wrong location; (2) lib's #84/#91 InsertLocation overload + InsertLocationError.inlineModeRequiresParagraphIndex / .invalidParagraphIndex(Int) structured errors never reached MCP callers (handler self-built OMML and bypassed lib); (3) inline mode (display_mode=false) always created a NEW Paragraph(runs: [eqRun]) instead of appending OMML run to the EXISTING paragraph at paragraph_index. v1 (91506f8) tried delegating to lib but Codex 6-AI verify caught P1 regression: lib's Document.insertEquation overload internally uses @available(*, deprecated, ...) MathEquation flat output (Field.swift:301) — emits truncated '(a)/(b' AND nested invalid OOXML. v2 (357cbe7) unified handler — both latex AND components paths use MathComponent.toOMML() for structurally correct OMML; display mode routes through lib's throwing insertParagraph(_:at: InsertLocation); inline mode handler-side appends OMML run to existing paragraph. v3 (339ab77) restored eqPara.properties.alignment = .center for display mode. +- Migration: use display_mode=true for 'new paragraph with equation' or call insert_paragraph + insert_equation separately. 7 NEW Issue98InsertEquationLibBypassTests pin contracts (5 RED→GREEN scenarios + 2 quality regression tests including unzip -p document.xml verifying structure + centering survives + deprecated '(a)/(b)' pattern absent). 6-AI verify ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) caught both P1 regressions in v1; Codex sanity check on v2 caught the centering P2. 6 P2/P3 follow-up issues filed (#105-#110). + +## [3.17.7] - 2026-05-01 + +### Added +- Bilateral mirror coverage for direct-child OMML at 4 wrapper positions ( direct child for Pandoc display math / direct child / direct child / nested wrapper combos), plus 2 NEW library-wide spec capabilities. +- Spectra change flatten-replace-omml-bilateral-coverage. 2 NEW spec capabilities promoted to openspec/specs/: ooxml-paragraph-text-mirror (mirror invariant + ReplaceResult informative refusal contract) + ooxml-library-design-principles (Correctness primacy + Human-like operations as foundational normative invariants for all ooxml-swift mutators). +- Write side: NEW public API WordDocument.replaceTextWithBoundaryDetection returns ReplaceResult enum (.replaced(count:) / .refusedDueToOMMLBoundary(occurrences:) / .mixed(replacedCount:, refusedOccurrences:)) with Occurrence(matchSpan:, ommlSpans:) carrying flattened-text coordinates. + +### Changed +- ooxml-swift dep bump 0.21.10 → 0.21.11 — closes 5-issue cluster PsychQuant/che-word-mcp #99 + #100 + #101 + #102 + #103. +- Read side: flattenedDisplayText walks direct-child OMML at all 4 wrapper positions with source-XML position ordering. +- Mirror invariant — asymmetric by design: reads include OMML visibleText (anchor lookup universe extends to math); writes treat OMML as opaque structural units (refuse cross-OMML mutation rather than silently delete equations). +- Decision 4 raw passthrough preserved: direct-child OMML stays in Paragraph.unrecognizedChildren / HyperlinkChild.rawXML(_) / AlternateContent.rawXML — no parser change, no writer change, round-trip fidelity unaffected. +- MCP impact: replace_text and other anchor-lookup tools now find paragraphs containing direct-child OMML at all 4 wrapper positions; existing replace_text MCP tool unchanged (backward-compatible). +- Tests: 236 passing che-word-mcp / 813→829 ooxml-swift (+16 in Issue99FlattenReplaceOMMLBilateralTests). + +### Fixed +- Pre-fix Paragraph.flattenedDisplayText AND Document.replaceInParagraphSurfaces shared a symmetric blind spot: direct-child / (not wrapped in ) was silently dropped → anchor lookups against paragraphs containing display math silently 0-matched. +- Backward compatible — strict superset of pre-fix behavior + +## [3.17.6] - 2026-04-30 + +### Changed +- ooxml-swift dep bump 0.21.9 → 0.21.10 — closes PsychQuant/che-word-mcp#104. +- MCP impact: update_all_fields now finds and updates SEQ fields in canonical 5-run form (post-roundtrip / native Word emission); list_captions benefits transitively via shared FieldParser. +- Verified by 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) — 4 PASS / 2 WARN / 0 BLOCK; production reproducer rescue-swift-v317.docx via DocxReader path confirmed working. 5 P3 follow-ups filed in ooxml-swift (#29 SEQ Table coverage / #30 multi-paragraph counter / #31 multi-SEQ same paragraph / #32 DoS hardening / #33 discriminator invariant). +- Tests: 236 passing che-word-mcp / 809→813 ooxml-swift (+4 sub-tests in Issue104FieldParserCanonicalFormTests). + +### Fixed +- Form-level FieldParser canonical 5-run fldChar fix (orthogonal to v3.17.5's #94 container-level fix). +- Pre-fix update_all_fields returned silent no-op ('no SEQ fields found') on docs containing valid SEQ paragraphs at body top level when fldChar block was emitted in canonical 5-run form (each // in its own sibling — what DocxReader produces post-roundtrip and what native Word always emits). +- Pre-fix worked only on in-memory wrap_caption_seq output before save. +- Two ooxml-swift commits land via this dep bump: 537de62 FieldParser two-phase parse (Phase-1 baked form + Phase-2 parseFiveRunSpan state machine probing both Run.rawXML and Run.rawElements per recognizedRunChildren = ['rPr','t','drawing','oMath','oMathPara'] allowlist) + 58fe4f9 P1 sub-fix surfaced by 6-AI verify (Logic + Devil's Advocate runtime test): canonical-branch Run.text rewrite was silently overridden by Run.toXML() rawXML short-circuit; new rewriteCanonicalCachedText helper splices new value into embedded while preserving + xml:space=preserve, AND keeps Run.text in sync. +- Backward compatible — strict superset of pre-fix behavior + +## [3.17.5] - 2026-04-30 + +### Changed +- Known incompleteness (3 follow-ups filed): ooxml-swift#25 header/footer/footnote/endnote SEQ scans still flat .paragraphs view; ooxml-swift#26 FieldParser.parse(paragraph:) misses inline SDT/hyperlink/fieldSimple/alternateContent surfaces; ooxml-swift#27 verify-with-user-fixture for real thesis docx roundtrip; plus ooxml-swift#28 refactor candidate (extract BodyChildVisitor protocol). +- Verified by 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh). +- Tests: 236 passing che-word-mcp / 805→809 ooxml-swift. +- Backward compatible except #87 documented behavior change + +### Removed +- ooxml-swift dep bump 0.21.8 → 0.21.9 — triple #87 + #93 + #94 release. #87 (Comment.paragraphIndex flat-counter, observable behavior change): list_comments paragraph_index now consistently 0-indexed against get_paragraphs() flat list; pre-fix off-by-N for any docx with non-paragraph BodyChild siblings before commented paragraph; callers manually compensating with paragraph_index - 1 must remove compensation. #93 (wrap_caption_seq SEQ inherits source position, caption visual fix): pre-fix 「圖 4-1:xxx」 became 「圖 4-:xxx1」 because new SEQ run had position=nil while source-loaded preText/postText had position>0; one-line fix seqRun.position = preRun.position; insert_bookmark=true × source-loaded paragraph still has same gap (filed PsychQuant/ooxml-swift#24, default insert_bookmark=false unaffected). #94 (update_all_fields traverses .table and .contentControl containers): pre-fix body loop only processed top-level .paragraph BodyChild, silently skipped .table and .contentControl(_, children:) — SEQ fields inside table cells/block-level SDTs never updated, returning 'no SEQ fields found' for thesis docs (caption paragraphs commonly live inside the table they describe); same gap #68 closed for findBodyChildContainingText; new walkAndProcessBodyChildForFields recursive walker mirrors #68 pattern; heading-count semantics: only top-level direct .paragraph body children count toward chapter-reset. + +## [3.17.4] - 2026-04-30 + +### Changed +- paired #91 + #92 release + +## [3.17.3] - 2026-04-29 + +### Added +- Result is exactly what insert_paragraph / insert_image_from_path / insert_caption etc. tools see when resolving after_text / before_text anchors. 10 new public-API surface tests in Issue86PublicAnchorLookupTests pin the canonical behavior across releases. + +### Changed +- bump ooxml-swift dep 0.21.6 → 0.21.7 (pure transitive bump, no MCP source changes; exposes public anchor lookup API). +- Three WordDocument APIs upgraded private → public (PsychQuant/che-word-mcp#86): findBodyChildContainingText(_:nthInstance:) instance method + bodyChildContainsText(_:needle:) static + tableContainsText(_:needle:) static. +- External Swift SPM consumers (rescue scripts, dxedit CLI, third-party tooling) can now call canonical anchor-lookup logic directly instead of reimplementing with diverging semantics (some skipped .contentControl recursion, some skipped table cell traversal pre-#68, some used different nthInstance counting rules). +- Backward compatible: pure additive private → public visibility change; no API removals, no behavioral changes for existing callers + +## [3.17.2] - 2026-04-29 + +### Added +- bump ooxml-swift dep 0.21.2 → 0.21.6 (pure transitive bump, no MCP source changes; 4 ooxml-swift releases worth of hardening + new APIs land transparently). v0.21.3 (XML hardening, PsychQuant/ooxml-swift#7): DTD reject + 64KB attr-value cap + SAX-based root-element attribute parsing + name whitelist on emit. +- New XMLHardeningError throws on malicious .docx input. v0.21.4 (roundtrip loud-fail, PsychQuant/ooxml-swift#6): AlternateContent.fallbackRunsModified dirty flag throws RoundtripError.unserializedFallbackEdit on stale fallbackRuns mutation. + +### Changed +- Position field cascade Int = 0 → Int? = nil across 13 typed-child models. xml:space=preserve autosense in Run.toXMLThrowing emit. +- Plus 3 unreleased docs commits on ooxml-swift main (PsychQuant/ooxml-swift#14 #15 #17 + corrective cd841e7) covering needsPPr emit-gate ↔ Issue4 lock-in test bidirectional reference, parseRun vs parseParagraph walker pattern divergence rationale, foreign-namespace pPr asymmetry documentation. +- Tests: 236 passing (no regressions). + +### Deprecated +- Run.commentIds @available deprecated; migrate to commentRangeMarkers. v0.21.5 (insertEquation flexibility, PsychQuant/che-word-mcp#84 #85): InsertLocation overload for Document.insertEquation + flattenedDisplayText OMML coverage extends anchor lookup beyond plain runs. v0.21.6 (mutation surface, PsychQuant/ooxml-swift#5): Hyperlink.text setter @available deprecated (lossy); migrate to .runs property. +- Backward compatible: deprecation warnings only; no API removals + +## [3.17.1] - 2026-04-29 + +### Changed +- bump ooxml-swift dep to v0.21.2 — pulls in pPr regression-guard + test infrastructure hardening from upstream (ooxml-swift#4 walker whitelist + #if DEBUG assert / ooxml-swift#13 empty self-closing test gap / ooxml-swift#16 countPPrOpenTags regex hardening excluding ). +- No public MCP tool change + +## [3.17.0] - 2026-04-28 + +### Added +- Tests: 5 new sub-tests in Issue62WrapCaptionSeqTests covering Scenarios 1-5. + +### Changed +- wrap_caption_seq MCP tool (Refs #62): Phase 2 of cross-repo work — exposes ooxml-swift v0.21.0 lib API as MCP tool. +- Bulk-wraps plain-text caption number portions in SEQ field runs across body paragraphs whose flattened text matches a regex (EXACTLY ONE numeric capture group). +- Captured digit becomes SEQ field cachedResult so Word's first-open render preserves user-typed numbering before F9. +- Rescues docs pasted from external sources (LaTeX-converted Word, Google Docs, Pandoc) so insert_table_of_figures / insert_table_of_tables produce populated TOFs. +- Idempotent: paragraphs already wrapping a SEQ field for sequence_name reported in skipped, never double-wrapped (detection covers both FieldSimple AND rawXML fldChar emissions). +- Phase 1 ships scope:body only (recurses into table cells + nestedTables + block-level SDT children); scope:all returns Error: scope_not_implemented for now (cross-container path lands in v3.17.x). +- Bookmark wrap opt-in (insert_bookmark + bookmark_template with literal ${number}) so default 23-caption rescue does NOT pollute list_bookmarks. +- Returns JSON: {matched_paragraphs, fields_inserted, paragraphs_modified:[idx,...], skipped:[{paragraph_index, reason},...]}. +- All preconditions checked BEFORE document mutation (regex compile + capture-group count + format/scope enums + bookmark_template invariant + doc_id opened). +- Suite 231 → 236 (+5, 0 fail / 9 skip). +- No ooxml-swift dep bump (still v0.21.0 from v3.16.2) + +## [3.16.2] - 2026-04-28 + +### Changed +- ooxml-swift dep bump 0.20.5 → 0.21.0 (Refs #62 #68): pure dep bump, no MCP source changes. +- MCP impact — insert_paragraph / insert_image_from_path / insert_equation / insert_caption calls using before_text / after_text now succeed when anchor text lives inside a table cell or block-level SDT (common in thesis docs with figure/table captions inside table cells). +- Returned position is top-level body.children index of the containing structure. +- Use into_table_cell for inside-cell inserts. +- Empty-needle guard: passing before_text:'' / after_text:'' now returns textNotFound instead of silently inserting at index 1. #62 (ooxml-swift v0.21.0): WordDocument.wrapCaptionSequenceFields(...) is now linked into the binary. +- Not yet exposed as an MCP tool — the wrap_caption_seq MCP wrapper ships in v3.17.0 (Phase 2 of the cross-repo work). +- Existing MCP tools unaffected. +- Suite: 231 → 231 (0 fail / 9 skip) + +### Fixed +- Picks up two ooxml-swift fixes that surface transparently via existing tool dispatch. #68 (ooxml-swift v0.20.6): InsertLocation.findBodyChildContainingText now traverses .table (rows × cells × paragraphs + nestedTables) and .contentControl(_, children:) (recursive). + +## [3.16.1] - 2026-04-28 + +### Added +- New static toolAnchorWhitelists dict (single source of truth, keyed by MCP tool name → accepted anchor list) + new detectPresentAnchors(_:tool:) overload. 4 conflict-detection call sites switched from literal anchor arrays to (tool:) lookup. 4 new invariant/parity tests. + +### Changed +- anchorPresence whitelist drift prevention (Refs #80): pure refactor, no runtime behavior change. +- Suite 227 → 231 (+4). +- Old (args, anchors:) overload preserved. +- Out-of-scope follow-ups: schema descriptions + dispatcher if-else chains still hardcode anchor names (pre-existing surfaces, not introduced by this PR) + +## [3.16.0] - 2026-04-28 + +### Added +- Specify exactly one.' New static helper detectPresentAnchors with per-anchor type-aware predicates (null and wrong-type values do NOT count). #72 (validation) explicit text_instance ≤ 0 rejected — 'Error: : text_instance must be ≥ 1, got .' Omitted text_instance still defaults to 1. #70 (DX) all 32 'return Error:' lines in 4 #61-target tools rewritten as 'Error: : ' for AI-caller error attribution. throw WordError.* paths unchanged. + +### Changed +- **BREAKING:** Bundle B anchor DX consistency (Refs #70 #71 #72): BREAKING (input validation only) — three coordinated MCP-layer changes across the 4 #61-target tools. #71 (behavior) silent priority on conflicting anchors → structured error: insert_paragraph(after_text + index) was previously silent-priority; now returns 'Error: insert_paragraph: received conflicting anchors: after_text + index. +- Scope deliberately limited to 4 tools; remaining 41 return Error lines elsewhere deferred to error-prefix-sweep follow-up. +- Tests: 201 → 227 (+26 sub-tests, 0 fail / 9 skip). +- No ooxml-swift dep bump (still v0.20.5) + +### Removed +- SemVer rationale (minor not major): no schema break, no tool removal, restricting previously-undefined behavior. + +## [3.15.3] - 2026-04-28 + +### Added +- Bundle A2 polish from v3.15.2 verify R3-R6 follow-ups (Refs #76 #77 #78 #79): #76 (docs) insert_caption description corrected from '三種 anchor' to enumerate all 5 (paragraph_index / after_image_id / after_table_index / after_text / before_text); insert_equation paragraph_index description clarified that the int is body.children-indexed (cross-references PsychQuant/ooxml-swift#10 for the lib-layer convention split). #77 (docs) insert_caption anchor set wording precision in CHANGELOG / manifest / marketplace.json / plugin.json — was 'its own anchor set including after_table_index' (implies disjoint), now 'shares after_image_id / after_text / before_text / paragraph_index, adds after_table_index + position, lacks into_table_cell' (explicit shared/adds/lacks). #78 (test) extends #69 append-index regression pin to bookmarkMarker / rawBlockElement / block-level contentControl body-children — the table case alone wouldn't catch a regression to getParagraphs().count - 1 that breaks for SDT / TOC bookmark / vendor extensions. #79 (test) adds round-trip depth: testInsertParagraphAppendIndexRoundTripsForInsertCalls demonstrates insert-family round-trip works (append + insert(N+1) + verify ordering); testInsertParagraphAppendIndexCannotRoundTripToUpdate pins the cross-family trade-off (update_paragraph(index=N) throws WordError.invalidIndex). + +### Changed +- Tests: 196 → 201 (+5 sub-tests, 0 fail / 9 skip). +- No production code change. +- No ooxml-swift dep bump (still v0.20.5) + +## [3.15.2] - 2026-04-28 + +### Changed +- Tests: 194 → 196 (0 fail / 9 skip). +- No behavior change in normal call paths. +- No ooxml-swift dep bump (still v0.20.5). + +### Removed +- Word MCP Server - Swift 原生 OOXML 操作,233 個工具。v3.15.1 closes verify findings F1+F2+F3+F5 from v3.15.0 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh):F1 (P1) `after_image_id` anchor 加到 insert_paragraph + insert_equation (display only) + insert_image_from_path — lib InsertLocation.afterImageId 從 #44 起就 ready 但只有 insert_caption 暴露 MCP-layer;v3.15.0 inherited 這個 gap,本 release 補齊。F2 (P1) `into_table_cell` 加到 insert_equation (display only) — display equation 是新建 paragraph,cell 放置 well-defined;inline mode 拒絕。F3 (P2) equation 成功訊息加 anchor info('Inserted equation (display mode: true, after text X (instance N))' 等)— 關閉同 v3.14.4 LOOKUP 的 over-claim 模式(caller 之前無法區分 anchor 命中 vs append fallthrough)。F5 (P2) malformed `into_table_cell` partial dict(傳 `{table_index: 0}` 缺 row + col)silent fallthrough → 走 next anchor / append → 結果在錯位置且 caller 不知。改回 structured 'Error: into_table_cell requires all three fields',3 #61-target tools 同步修(cross-cutting consistency)。Anchor priority unified across all 3 #61-target insert tools (`insert_paragraph` / `insert_equation` / `insert_image_from_path`; `insert_caption` has its own anchor set):into_table_cell > after_image_id > after_text > before_text > index > append。Inline equation 拒絕擴大 — 現在拒絕所有 4 個 anchor params(before/after_text + after_image_id + into_table_cell),不只 v3.15.0 的 2 個。Tests: Issue61V315PointReleaseTests (9 sub-tests cross 3 tools)。Suite 185 → 194 (0 fail / 9 pre-existing skips)。**No ooxml-swift dep bump** — 仍 v0.20.5(lib 從 #44 起就 ready)。Follow-up issues 另開:F4 inline equation 更通用設計 (e.g. into_paragraph_with_text) / F6 text anchor 擴及 table-cell paragraphs 與 block-level SDT / F7 getParagraphs().count - 1 message 在 doc 含 tables/SDTs 時 mis-report (pre-existing) / F8 error message 加 tool-prefix / F9 multiple anchor params 同時傳入 silent priority winner / F10 text_instance≤0 normalize。Backward compatible — schema additions optional,既有 v3.15.0 callers 不變;只有 malformed into_table_cell 從 silent fallthrough 改成 structured error(會被 buggy caller 注意到)+ equation message 加 suffix(substring 'Inserted equation' 仍存在)。v3.15.0 closes #61 — insert_paragraph 與 insert_equation 現在接受跟 insert_image_from_path 一致的 anchor 參數(after_text / before_text / text_instance / into_table_cell — into_table_cell 僅 insert_paragraph)。Pre-fix MCP 層 silently drop 這些參數 — JSON schema 接受但 handler dispatch 忽略,呼叫 fall through 到 legacy paragraph_index path 或 append at end。Lib API Document.insertParagraph(_: at: InsertLocation) 從 #44 起就支援所有六種 anchor cases(paragraphIndex / afterImageId / afterTableIndex / intoTableCell / afterText / beforeText),本 release 補齊 MCP 側 wire-up gap,無需 ooxml-swift dep bump(v0.20.5 已足夠)。Anchor priority mirror insert_image_from_path:into_table_cell > after_text > before_text > index > append。Errors(textNotFound / tableIndexOutOfRange / tableCellOutOfRange)回 structured 訊息而非 silent fallthrough — AI caller 能 surface failure 而非拿到位置錯誤的 misleading 'success'。**Inline equation explicit rejection**:insert_equation 在 display_mode=false(inline)時 explicitly 拒絕 after_text / before_text,回 structured error — 語意模糊('append OMML run into existing para containing this text' vs 'insert new para before/after target para'),inline placement 仍用 paragraph_index。Display-mode equation 建新 paragraph,anchor 語意明確。Tests: Issue61InsertParagraphAnchorsSmokeTests(5 sub-tests:after_text resolution / before_text resolution / text_instance disambiguation / into_table_cell append / textNotFound error)+ Issue61InsertEquationAnchorsSmokeTests(4 sub-tests:after_text + before_text in display mode / inline mode rejection / textNotFound error)。Suite 176 → 185 (0 fail / 9 pre-existing skips)。**No ooxml-swift dep bump** — v0.20.5 已有所有需要的 lib API。Backward compatible — anchor params 全 optional;既有 index / paragraph_index callers 不變;無 schema removal、無既有行為改動。**Real-world impact**:thesis-rescue / template-population workflow 不再需要 fall back 到「append at end + 手動 cut/paste in Word UI」或 binary-search 猜 paragraph_index,AI caller 對 3 #61-target insert tools(insert_image_from_path / insert_paragraph / insert_equation)對稱地用 surrounding context 定位 anchor。v3.14.5 closes Refs #63 verify F1 P1:擴充 findBodyChildContainingText 涵蓋所有 editable surfaces,補上 v3.14.4 CHANGELOG over-claim 的 insert anchor lookup gap。Pre-fix v3.14.4 只修了 REPLACE path(replace_text → Document.replaceInParagraphSurfaces 走 contentControls / hyperlinks / fieldSimples / alternateContents)但 LOOKUP path(findBodyChildContainingText 用於 InsertLocation.afterText / .beforeText 解析)只看 para.runs,所以 insert_image_from_path / insert_paragraph / insert_caption before_text/after_text 對 SDT-wrapped anchor 仍丟 textNotFound。Verify ensemble(5 Claude reviewers + Codex)的 requirements F1 P1 finding 抓到 CHANGELOG over-claim — 用戶選擇 Option B 擴充修而非縮 scope。ooxml-swift v0.20.5 新增 TextReplacementEngine.flatTextOfContentXML(read-only XML walker mirror replaceInContentXML flattening rules,跳過 / / nested subtrees)+ Paragraph.flattenedDisplayText 擴充 method 涵蓋 runs + hyperlinks + fieldSimples + alternateContents + contentControls(recursive into nested SDT children)。findBodyChildContainingText 改用 flattenedDisplayText 取代原本的 para.runs.map { $0.text }.joined()。新增 Issue63InsertAnchorInlineSDTTests(lib,3 wrappers × afterText/beforeText/insertImage = 3 sub-tests / 5 assertions)+ Issue63InsertAnchorInlineSDTSmokeTests(MCP,2 sub-tests pin lib-layer fix)。Suite 693 → 696 ooxml-swift / 174 → 176 che-word-mcp(0 fail)。基於 ooxml-swift v0.20.5。Backward compatible — strict superset of pre-fix lookup behavior(找到更多 anchors,既有 plain runs anchor 仍照常運作)。**Insert anchor lookup gap 此 release 完整補齊**,所有 inline wrappers 在 REPLACE + LOOKUP 兩個 path 都對稱覆蓋。v3.14.4 修 replace_text 對 inline `` content control 的 wrapper coverage gap(Refs #63):Document.replaceInParagraphSurfaces 之前覆蓋 paragraph.runs / hyperlinks / fieldSimples / alternateContents 但 **沒有** paragraph.contentControls — 包在 inline `` 裡的文字 silently 0-match。外部 converter(pandoc / Quarto / LaTeX→docx)習慣把 cross-ref placeholder([tab:foo] / [fig:bar] / [Smith 2020])包成 inline SDT,所以症狀跟 bracketed text 高度相關,但其實 **brackets 是 coincidence** — bracket-free needle 在 inline SDT 裡也 fail。Issue title「literal `[ ]` brackets」是誤導,差別測試(fldChar / fldSimple / hyperlink / inlineSDT 四個 inline wrapper × 四種 needle)證實只有 inline SDT case 失敗,其他三個 wrapper 從 v0.19.0+ #56 Phase 5 起就 typed-runs 覆蓋好了。Surgical fix architecture:ooxml-swift v0.20.4 新增 TextReplacementEngine.replaceInContentXML(XML DOM walker,wrap ContentControl.content 在 synthetic root xmlns:w,遍歷所有 `` descendants 在 document order,build flat string + offset map mirror flattenRuns invariant,run same literal/regex find logic,splice replacements 回 `` element string content;re-serialize wrapper children 去掉 wrapper tag)+ Document.replaceInContentControl(recursive helper 涵蓋 cc.content + cc.children 處理 nested SDT)。Wired 進 Document.replaceInParagraphSurfaces 接在 alternateContents loop 之後。設計上跳過:``(TC deletion text,不顯示)、``(field instruction code,不顯示)、nested `` subtrees(typed cc.children 由外層 recursion 處理避免 double-replacement)。Round-trip discipline:只 mutate `` element 的 string content;xml:space="preserve" 與其他 attribute 完整保留(attribute set 從不被 touch)。新增 Issue63InlineSDTReplaceTests(4 個 wrapper × 4 個 needle 的 differential test + nested SDT recursion + round-trip wrapper preservation = 3 sub-tests / 18 assertions)+ MCP-layer Issue63ReplaceTextInlineSDTSmokeTests(2 sub-tests pin lib-layer fix)。Suite 690 → 693 ooxml-swift / 172 → 174 che-word-mcp(0 fail)。基於 ooxml-swift v0.20.4。Backward compatible — surgical fix 只新增 code path,沒改任何既有行為(runs/hyperlinks/fieldSimples/alternateContents replacement path 不動,ContentControl model 維持 raw XML storage 不重構)。Out-of-scope(separate follow-up):ContentControl 從 content:String 升級為 typed Run 列表(SDD-warranted refactor);smartTags / bidiOverrides / customXmlBlocks / unrecognizedChildren 維持 raw-carrier passthrough。v3.14.3 sub-stack E of paragraph-level content-equality (closes #66):Paragraph 新增 w14ParaId / w14TextId 欄位,提取並 round-trip opening tag 上的 w14:* 屬性(Word 用於 collaborative editing 和 comment threading 的 revision-tracking GUIDs)。Plain attribute passthrough,String? typing — Word 的 GUIDs 是 8-char hex tokens(NOT RFC 4122 UUIDs),所以 opaque-string round-trip 是正確選擇。Pre-fix v3.14.2 silently dropped 兩個 attributes — 佔了 NTPU 論文 fixture w14:* token loss 的 ~95%(2214 / 2359 lost tokens 是這兩個 attrs)。Post-E 量測:w14: 保留率 10.55% → 93.98%;document.xml 流失 10.95% → 8.02%。Combined with sub-stack D (#65), total impact since v3.14.1: 50% → 98.89% (D)、w14:* 5% → 93.98% (E)、document.xml 流失 16.66% → 8.02% (D+E, -8.64 pp)。Matrix-pin testDocumentContentEqualityInvariant 同步抬升 floor(w14: 0.04→0.90、sizeLossRatio 上限 0.12→0.10)— matrix-pin 現在 LOAD-BEARING across **5 preservation classes**(rFonts/noProof/lang/kern/w14:)spanning run-level + paragraph-level + paragraph-mark scope。Defensive design (R2 review fixes):openingPTag() routes attributes through escapeXMLAttribute;parseParagraph rejects schema-invalid empty-string GUIDs。基於 ooxml-swift v0.20.3。Backward compatible(兩個 fields 都 optional、default nil;openingPTag empty-attrs gate 防止 synthetic emit)。剩餘 8% 流失主要是其他 w14:* attribute classes(如 w14:* on )— tracked as separate follow-up SDD。v3.14.2 sub-stack D of paragraph-level content-equality (closes #65):ParagraphProperties 新增 markRunProperties 欄位,提取並 round-trip direct child of — paragraph-mark formatting per ECMA-376 §17.3.1.27 CT_PPrBase(控制 pilcrow ¶ 字符外觀的字型/顏色/語言/字距)。Reuses parseRunProperties verbatim — schema 跟 run-level CT_RPr 一致,所以 sub-stack C 的 typed extraction(rFonts 4-axis / noProof / kern / lang 3-axis)和 rawChildren passthrough(w14:* 效果)全部免費繼承。NTPU 論文 fixture 量測影響: 保留率 50% → 98.89%; 88% → 98.77%; 92% → 100%; 84% → 99.93%;document.xml 大小流失 16.66% → 10.95%。Matrix-pin testDocumentContentEqualityInvariant 同步抬升 floor(lang 0.45→0.95、rFonts/noProof/kern 0.95、sizeLossRatio 上限 0.175→0.12)。Sub-stack E (#66 w14:paraId/textId) 接著 ship 到 v3.14.3,把流失壓到 < 5%,達成「edit 一個字 → document.xml shrinks <1%」strong demo。基於 ooxml-swift v0.20.2。Backward compatible(markRunProperties optional、default nil、writer empty-gate 防止 synthetic empty )。v3.14.1 sub-stack C-CONT closes triple-confirmed P0 (R2 + R5 + Codex 6-AI verify):recognizedRprChildren Set 列了 ~16+ rPr child kinds 為 'recognized' 但 parseRunProperties 沒有 typed extraction → silent drop。受影響的常見元素:(character spacing)、/(run shading)、(CJK emphasis marks)、///////。Fix:trim Set 到 ONLY actually-typed-extracted-or-emitted kinds。Round-trip size loss: pre-fix v3.13.x 32% → v3.14.0 17.75% → v3.14.1 16.66%。Methodology lesson (6th):P2 from one reviewer can become P0 when another applies real-world impact lens + +### Fixed +- closes Bundle A polish from #61 R2 verify (Refs #69 #73 #74 #75): #69 (bug) insert_paragraph append message reports body.children index instead of getParagraphs().count - 1 (mis-reported in docs with tables/SDTs by skipping table children); #74 (bug) insert_image_from_path debug log labels after_image_id correctly (was silently labeled 'index' since v3.15.1); #73 (test) regression pin for equation F5 partial-dict guard (existed since v3.15.1 but was untested); #75 (docs) clarifies '3 insert tools' wording — scope is the 3 #61-target tools (insert_paragraph / insert_equation / insert_image_from_path); insert_caption is a 4th insert tool with a partially-overlapping anchor set (shares after_image_id / after_text / before_text / paragraph_index, adds after_table_index + position, lacks into_table_cell), intentionally outside this unification scope. + +## [3.14.0] - 2026-04-27 + +### Fixed +- closes #60(sub-stack C of #58/#59/#60)— RunProperties field-loss audit。Bump ooxml-swift v0.19.13→v0.20.0。新增 typed fields:4-axis rFonts (ascii/hAnsi/eastAsia/cs/hint — 之前被收斂成單一值)、noProof、kern、3-axis lang (val/eastAsia/bidi),加上 rawChildren passthrough 處理 unrecognized rPr children(如 w14:textOutline / w14:textFill / w14:glow)。**Pre-fix MCP 用戶看到 eastAsia/cs 字型(如 DFKai-SB 用於繁體中文)在 round-trip 時 silently 被替換成 ascii 值;v3.14.0 完整保留 4 個 axis**。Matrix-pin testDocumentContentEqualityInvariant 加上 preservation-class-3 ratio-floor assertions,現在 LOAD-BEARING — 任何未來 RunProperties regression 都會被 matrix-pin 抓到。Thesis fixture document.xml round-trip 大小:pre-fix 32% 損失 → post-sub-stack-C 17.75% 損失(改善 14.25 percentage points)。剩餘 17.75% 是 paragraph-mark rPr + w14:paraId/textId drops(separate out-of-scope follow-up SDD)。**'if not typed, preserve as raw' 原則架構性完成** — 從 sub-stack A (#58 BodyChild)、B (#59 WhitespaceOverlay) 一路發展到 C (#60 RunProperties)。Backward compatible — 保留 fontName field,mirror rFonts.ascii。v3.13.13 CRITICAL HOTFIX (sub-stack B-CONT-2-CONT) reverted v3.13.12 的 TIER-0 over-fix。v3.13.12 (DO NOT USE — 刪除 內容)。v3.13.11 sub-stack B-CONT。基於 ooxml-swift v0.20.0。 diff --git a/plugins/che-word-mcp/CLAUDE.md b/plugins/che-word-mcp/CLAUDE.md new file mode 100644 index 0000000..8402220 --- /dev/null +++ b/plugins/che-word-mcp/CLAUDE.md @@ -0,0 +1,107 @@ +# che-word-mcp — CLAUDE.md + +## Purpose + +Microsoft Word (.docx) MCP plugin. Wraps the [CheWordMCP](https://github.com/PsychQuant/che-word-mcp) Swift binary via auto-download wrapper. **Swift-native OOXML manipulation** — reads and writes .docx without requiring Microsoft Word installation. 218+ tools cover the full Office.js OOXML Roadmap P0 set ([#43](https://github.com/PsychQuant/che-word-mcp/issues/43) closed 100%). + +Built on [`ooxml-swift`](https://github.com/PsychQuant/ooxml-swift) v0.18.0. + +## Components + +### MCP Tools (218+) + +| Category | Representative tools | +|----------|---------------------| +| Document lifecycle | `create_document`, `open_document`, `save_document`, `close_document`, `finalize_document`, `recover_from_autosave`, `checkpoint`, `revert_to_disk` | +| Properties / theme | `get_document_properties`, `set_theme`, `update_theme_color`, `update_theme_fonts`, `set_language` | +| Content (text + paragraphs) | `get_text`, `get_paragraphs`, `insert_paragraph`, `update_paragraph`, `replace_text`, `replace_text_batch`, `search_text_with_formatting`, `list_all_formatted_text` | +| Formatting | `format_text` (with `as_revision`), `set_paragraph_format` (with `as_revision`), `set_character_spacing`, `set_text_effect`, `set_paragraph_border`, `set_paragraph_shading` | +| Styles (v3.10) | `list_styles`, `apply_style`, `create_style`, `update_style`, `delete_style` | +| Numbering / lists (v3.10) | `insert_bullet_list`, `insert_numbered_list`, `set_list_level`, `set_outline_level` | +| Sections / page setup (v3.10) | `get_section_properties`, `insert_section_break`, `set_page_size`, `set_page_margins`, `set_page_orientation`, `set_columns`, `set_line_numbers` | +| Tables (v3.11) | `insert_table`, `update_cell`, `add_row_to_table`, `merge_cells`, `set_cell_vertical_alignment`, `set_table_style`, `set_header_row`, `set_table_alignment` | +| Hyperlinks (v3.11) | `insert_hyperlink`, `update_hyperlink`, `list_hyperlinks`, `insert_internal_link`, `insert_cross_reference` | +| Headers / footers (v3.11) | `add_header`, `update_header`, `list_headers`, `add_footer`, `insert_page_number` (even/odd + section header map) | +| Comments | `insert_comment`, `update_comment`, `reply_to_comment`, `resolve_comment`, `list_comment_threads`, `sync_extended_comments`, `add_person`, `list_people` | +| Track Changes — accept/reject | `enable_track_changes`, `disable_track_changes`, `get_revisions`, `accept_revision`, `reject_revision`, `accept_all_revisions`, `reject_all_revisions` | +| Track Changes — write side (v3.12) | `insert_text_as_revision`, `delete_text_as_revision`, `move_text_as_revision`, `format_text` / `set_paragraph_format` with `as_revision: true` | +| Content controls / SDT (v3.9) | `insert_content_control`, `list_content_controls`, `update_content_control_text`, `replace_content_control_content`, `insert_repeating_section`, `insert_checkbox`, `insert_dropdown` | +| Images | `insert_image`, `insert_floating_image`, `update_image`, `set_image_style`, `export_image`, `export_all_images`, `insert_drop_cap` | +| Footnotes / endnotes / equations / captions | `insert_footnote`, `insert_endnote`, `insert_equation`, `insert_caption`, `list_captions` | +| Bookmarks / TOC / watermarks | `insert_bookmark`, `insert_toc`, `insert_table_of_figures`, `insert_index`, `insert_watermark`, `insert_image_watermark` | +| Fields | `insert_date_field`, `insert_page_field`, `insert_sequence_field`, `insert_calculation_field`, `insert_if_field`, `insert_merge_field`, `update_all_fields` | +| Document protection | `protect_document`, `set_document_password`, `restrict_editing_region` | +| Compare / export | `compare_documents`, `compare_documents_markdown`, `export_text`, `export_markdown`, `export_revision_summary_markdown`, `export_comment_threads_markdown` | + +MCP namespace: `mcp__che-word-mcp__`. + +### Skills + +| Skill | 用途 | +|-------|------| +| `che-word-mcp` | 工作流指南:Direct vs Session 模式、tool 分類、Track Changes 寫側合約(`as_revision` + `track_changes_not_enabled` 例外)、SDT 控件、author resolution chain、常見 workflow(contract redline、multi-author review、fillable form) | + +## Two Operating Modes + +| Mode | Param | Tools | Use when | +|------|-------|-------|----------| +| Direct | `source_path` | 18 | 快速 read-only 檢查(list/search/info),不需 open/close lifecycle | +| Session | `doc_id` | All 218+ | 任何寫入或多步驟編輯都要走這個 | + +## Track Changes Contract(v3.12.0+ 重要) + +`as_revision: true` 是 **per-call opt-in**,不會自動開啟 track changes: + +| 狀態 | `as_revision: true` 行為 | +|------|--------------------------| +| Track changes enabled | 包成 `` / `` / `` / `` 標記 | +| Track changes disabled | **拋出 `track_changes_not_enabled`**,不靜默 enable | + +設計理由:避免副作用 — 呼叫 `format_text(as_revision: true)` 不會偷偷修改文件全域的 track changes 狀態。要先 `enable_track_changes(author: "...")` 再呼叫。 + +**Author resolution chain**:explicit `author` arg → `revisions.settings.author`(在 `enable_track_changes` 時設定)→ `"Unknown"`。 + +## Binary Dependency + +這是 binary-based plugin:`.mcp.json` 指向 `bin/che-word-mcp-wrapper.sh`,wrapper 會 auto-download `CheWordMCP` binary 到 `~/bin/`。 + +- Binary repo: [`PsychQuant/che-word-mcp`](https://github.com/PsychQuant/che-word-mcp) +- Binary name: `CheWordMCP` +- Underlying lib: [`PsychQuant/ooxml-swift`](https://github.com/PsychQuant/ooxml-swift) v0.18.0 +- Release asset naming: asset filename must contain `CheWordMCP` + +### Plugin vs Binary Version Sync + +| 改動類型 | 處理 | +|----------|------| +| 改 plugin shell(skill、CLAUDE.md、wrapper、`.mcp.json`) | `/plugin-tools:plugin-update che-word-mcp` | +| 改 binary source(新 tool、bug fix、ooxml-swift 升級) | 先 `/mcp-tools:mcp-deploy`(在 `mcp/che-word-mcp/`)→ 發 GitHub Release → 再跑 `plugin-update` | +| 同時改兩邊 | `plugin-update`(v1.11+ 會 detect 依賴不同步並 prompt 連動 mcp-deploy) | + +Plugin shell 與 binary 版本獨立。Plugin shell 升 minor 反映文件/skill/CLAUDE.md 變動;binary 版升反映 MCP server 內部新增 tool 或修 bug。 + +## Permissions + +無 macOS TCC 權限需求。plugin 跑在使用者層級,讀寫 `.docx` 檔案使用標準檔案系統權限(會繼承呼叫者的 sandbox / FDA 設定)。 + +## Development + +- Update after plugin-shell changes: `/plugin-tools:plugin-update che-word-mcp` +- Full release (binary + plugin): `/plugin-tools:plugin-deploy che-word-mcp` +- Binary source edits: go to `mcp/che-word-mcp/` (or sibling clone of `PsychQuant/che-word-mcp`) then `/mcp-tools:mcp-deploy` +- Health check: `/plugin-tools:plugin-health` + +## Office.js OOXML Roadmap P0 Closure Map + +| § | Sub-issue | che-word-mcp version | +|---|-----------|----------------------| +| §1 Content Controls (SDT) | [#44](https://github.com/PsychQuant/che-word-mcp/issues/44) | v3.9.0 | +| §2 Track Changes 寫側 | [#45](https://github.com/PsychQuant/che-word-mcp/issues/45) | v3.12.0 | +| §3 Numbering | [#46](https://github.com/PsychQuant/che-word-mcp/issues/46) | v3.10.0 | +| §4 Sections | [#47](https://github.com/PsychQuant/che-word-mcp/issues/47) | v3.10.0 | +| §8 Styles | [#48](https://github.com/PsychQuant/che-word-mcp/issues/48) | v3.10.0 | +| §9 Tables | [#49](https://github.com/PsychQuant/che-word-mcp/issues/49) | v3.11.0 | +| §14 Hyperlinks | [#50](https://github.com/PsychQuant/che-word-mcp/issues/50) | v3.11.0 | +| §16 Headers / Footers | [#51](https://github.com/PsychQuant/che-word-mcp/issues/51) | v3.11.0 | + +Umbrella: [#43](https://github.com/PsychQuant/che-word-mcp/issues/43) — closed 2026-04-25. diff --git a/plugins/che-word-mcp/README.md b/plugins/che-word-mcp/README.md new file mode 100644 index 0000000..e3feba0 --- /dev/null +++ b/plugins/che-word-mcp/README.md @@ -0,0 +1,442 @@ +# che-word-mcp + +**Word MCP Server** — Swift 原生 OOXML 操作,**235 個工具**,支援 Dual-Mode 存取 + preserve-by-default round-trip fidelity + programmatic Track Changes 生成 + `document.xml` lossless round-trip。 + +當前版本:**v3.20.0**(Plugin shell + Binary 同步)— closes [PsychQuant/che-word-mcp#160](https://github.com/PsychQuant/che-word-mcp/issues/160) — 兩個新 MCP tools 暴露 [PsychQuant/ooxml-swift v0.24.0](https://github.com/PsychQuant/ooxml-swift/releases/tag/v0.24.0) 的 `spliceOMath` API 給 MCP callers,跨 document 拷貝 verbatim `` XML 區塊。 + +- **`splice_omath_from_source`** — 單一 OMath splice,low-level。Source 用 `source_path`(Direct mode 唯讀)或 `source_doc_id`(Session mode);target 必須是 session-mode `doc_id`。Position 支援 `atStart` / `atEnd` / `afterText` / `beforeText`(後兩者配合 `anchor` + 可選 `instance`)。`omath_index` 0-based、按 source-document order 跨 carrier 統一排序。`rpr_mode` 控制 source Run rPr 怎麼帶到 target Run(`full` 預設 verbatim / `omathOnly` 白名單 / `discard` 空 rPr);`namespace_policy` 控制 prefix vs URI 處理(`lenient` 預設接受 `mml:` vs `m:` prefix mismatch / `strict` 任何 prefix 不同就 throw)。回傳 `Spliced 1 OMath block (...)` 或 structured error。 +- **`splice_paragraph_omath_from_source`** — paragraph-level batch convenience。把 source paragraph 內所有 OMath 按 source-document order splice 到 target paragraph 對應位置(內部用 ~10 chars source-text-context 自動推 anchor)。回傳 splice 數量或 `contextAnchorNotFound(omath_index, snippet)`(partial-success state 已留在 target)。 + +**Use case**:unblocks [kiki830621/collaboration_guo_analysis](https://github.com/kiki830621/collaboration_guo_analysis/issues/17) Phase 7 inline-math restoration — 522 inline OMath blocks 從 `_raw.docx` 救回 `碩士論文-rescue-swift-v317.docx`。 + +**Tests**:8 個新 `Issue160SpliceOMathFromSourceTests` cases(Direct/Session source modes、atEnd/afterText positions、error taxonomy、batch mode、rPr discard)。Full suite: 297 passing, 0 failures, 9 pre-existing skips。Bumps `ooxml-swift` dep `0.21.0` → `0.24.0`。 + +--- + +當前版本:**v3.19.0**(Plugin shell + Binary 同步)— closes [PsychQuant/che-word-mcp#105](https://github.com/PsychQuant/che-word-mcp/issues/105) + [#106](https://github.com/PsychQuant/che-word-mcp/issues/106) + [#107](https://github.com/PsychQuant/che-word-mcp/issues/107)(`insert_equation` argument-contract hardening from #98 verify follow-up)。**#106 (P2)** — reject `components`+`latex` 同時呼叫,handler 在解析前回 structured error(之前 silent 用 components、丟 latex 而不警示)。**#107 (P2)** — reject 非 boolean `display_mode`(agent caller 傳 `"false"` string 等),之前 `args["display_mode"]?.boolValue` 回 nil → `?? true` fall-back,inline-intent 呼叫被路由成 display-mode。**#105 (P3)** — `paragraph_index` schema 拆 display-mode(`body.children` 索引)vs inline-mode(top-level paragraph ordinal);新增 MCP-vs-lib `display_mode` 預設值差異說明(MCP 預設 `true` 對 agent 友好,lib 預設 `false`);移除 unreachable `inlineModeRequiresParagraphIndex` defensive arm。6 NEW `Issue98InsertEquationLibBypassTests` 釘住 contract(2 runtime errors + 4 source-grep contract tests)。6-AI verify ensemble(5 Claude reviewers + Codex gpt-5.5 xhigh):PASS with 0 blocking, 5 P2 follow-up, 6 P3 nits。6 follow-up issues opened:[#124](https://github.com/PsychQuant/che-word-mcp/issues/124) sibling fail-open(`paragraph_index` / `text_instance` / `into_table_cell.*` 同類 string-fail-open)/ [#125](https://github.com/PsychQuant/che-word-mcp/issues/125) JSON null vs absent semantics / [#126](https://github.com/PsychQuant/che-word-mcp/issues/126) runtime regression test for #105 with `[paragraph, table/SDT, paragraph]` fixture / [#127](https://github.com/PsychQuant/che-word-mcp/issues/127) `paragraphIndex!` force-unwrap → defensive guard / [#128](https://github.com/PsychQuant/che-word-mcp/issues/128) schema "anchor" phrasing 與 #71 anchor whitelist 對齊 / [#129](https://github.com/PsychQuant/che-word-mcp/issues/129) echo received value in arg-validation errors。Suite: 243 → 249 tests, 0 failures, 9 pre-existing skips。**Backward compatible** — only newly fail 在 caller 傳 `components`+`latex` 同時(之前 silent component-wins)或 string `display_mode`(之前 silent fail-open to true)—— 如果舊 caller 依賴這兩個 silent 行為,那個 call 本來就 buggy。 + +--- + +當前版本:**v3.17.8**(Plugin shell + Binary 同步)— closes [PsychQuant/che-word-mcp#98](https://github.com/PsychQuant/che-word-mcp/issues/98),`insert_equation` MCP handler 跨 3 commits 重構(`91506f8` v1 / `357cbe7` v2 / `339ab77` v3)。Pre-fix 三個 structural defects:(1) **silent-clamp** on out-of-range `paragraph_index`(handler 用 non-throwing `insertParagraph(_:at: Int)` overload,tool 回 success 但插錯位置);(2) lib `#84`/`#91` 的 `InsertLocation` overload + `InsertLocationError.inlineModeRequiresParagraphIndex` / `.invalidParagraphIndex(Int)` structured errors **從未到達 MCP callers**(handler 自建 OMML 完全 bypass lib);(3) inline mode(`display_mode: false`)總是建立 NEW `Paragraph(runs: [eqRun])` 而**不是** append OMML run 到既有段落。v1 嘗試委派 lib 但 Codex 6-AI verify 抓到 P1 regression:lib 的 overload 內部用 `@available(*, deprecated, ...)` `MathEquation` flat output(`Field.swift:301`)— `\frac{a}{b}` 被截斷成 `(a)/(b` AND 嵌套 `` invalid OOXML。v2 unified handler — 兩條 path(`latex` + `components`)都用 `MathComponent.toOMML()` 建結構化 OMML;display mode 走 lib 的 throwing `insertParagraph(_:at: InsertLocation)`;inline mode handler-side append OMML run 到既有段落。v3 補回 `eqPara.properties.alignment = .center`(lib display-mode convention,Codex sanity check P2)。**BREAKING for inline mode callers**:pre-fix 插新段落 → post-fix append 到既有段落(matches lib `#84`/`#91` design);migration 見下方 v3.17.8 區塊。7 NEW `Issue98InsertEquationLibBypassTests` 釘住 contract(5 RED→GREEN scenarios + 2 quality regression tests 含 `unzip -p document.xml` 驗 `` 結構 + centering 存活 + deprecated `(a)/(b)` 不出現)。6-AI verify ensemble(5 Claude reviewers + Codex gpt-5.5 xhigh)抓到 v1 兩個 P1 regression;Codex sanity check 抓到 v2 centering P2。6 P2/P3 follow-ups filed [#105](https://github.com/PsychQuant/che-word-mcp/issues/105) [#106](https://github.com/PsychQuant/che-word-mcp/issues/106) [#107](https://github.com/PsychQuant/che-word-mcp/issues/107) [#108](https://github.com/PsychQuant/che-word-mcp/issues/108) [#109](https://github.com/PsychQuant/che-word-mcp/issues/109) [#110](https://github.com/PsychQuant/che-word-mcp/issues/110)。Tests: 236 → 243 passing, 0 failures, 9 pre-existing skips。Backward compatible **except inline-mode BREAKING (documented)**。 + +**v3.17.8 BREAKING migration guide** + +``` +// Pre-fix(會插入 NEW paragraph at index 5) +insert_equation(doc_id, latex: "x", display_mode: false, paragraph_index: 5) + +// Post-fix(append OMML run 到既有段落 5;matches lib #84/#91 design) +insert_equation(doc_id, latex: "x", display_mode: false, paragraph_index: 5) + +// 若舊行為(要 NEW paragraph):改用 display mode 或拆兩步 +insert_equation(doc_id, latex: "x", display_mode: true, paragraph_index: 5) // 新段落 + 公式 +// 或 +insert_paragraph(doc_id, paragraph_index: 5) +insert_equation(doc_id, latex: "x", display_mode: false, paragraph_index: 6) +``` + +Pre-fix 的 silent-clamp + 新段落行為是 **bug**,不是 backward-compat surface — v3.17.8 的 BREAKING 是修復結構錯誤。 + +--- + +當前版本:**v3.17.7**(Plugin shell + Binary 同步)— bump `ooxml-swift` dep 0.21.10 → 0.21.11,closes 5-issue cluster [#99](https://github.com/PsychQuant/che-word-mcp/issues/99) + [#100](https://github.com/PsychQuant/che-word-mcp/issues/100) + [#101](https://github.com/PsychQuant/che-word-mcp/issues/101) + [#102](https://github.com/PsychQuant/che-word-mcp/issues/102) + [#103](https://github.com/PsychQuant/che-word-mcp/issues/103). **Bilateral mirror coverage** for direct-child OMML at 4 wrapper positions(`` direct child for Pandoc display math / `` direct child / `` direct child / nested wrapper combos)+ 2 NEW library-wide spec capabilities(`ooxml-paragraph-text-mirror` + `ooxml-library-design-principles`)。Pre-fix `Paragraph.flattenedDisplayText` AND `Document.replaceInParagraphSurfaces` 共享 symmetric blind spot:direct-child ``/``(沒有 `` wrapper)silently dropped → 對含 display math 的 paragraph anchor lookup silently 0-matched。讀側:`flattenedDisplayText` 走訪 4 wrapper positions 的 direct-child OMML,source XML position 排序。寫側:NEW 公開 API `WordDocument.replaceTextWithBoundaryDetection` 回 `ReplaceResult` enum(`.replaced(count:)` / `.refusedDueToOMMLBoundary(occurrences:)` / `.mixed(...)`),cross-OMML mutation 拒絕並回傳 informative `Occurrence(matchSpan:, ommlSpans:)`。Mirror invariant — **asymmetric by design**:reads include OMML visibleText(anchor lookup universe extends to math),writes treat OMML as opaque structural units(refuse cross-OMML mutation rather than silently delete equations)。Principle-driven(`ooxml-library-design-principles` spec):**Correctness primacy** + **Human-like operations** as foundational normative invariants for all `ooxml-swift` mutators。Decision 4 raw passthrough preserved — round-trip fidelity unaffected。Tests: 236 passing che-word-mcp / 813→829 ooxml-swift (+16 in `Issue99FlattenReplaceOMMLBilateralTests`)。Backward compatible — strict superset of pre-fix behavior。 + +**v3.17.7 bump `ooxml-swift` dep v0.21.10 → v0.21.11** — Bilateral mirror coverage for direct-child OMML(5-issue cluster #99-#103, Spectra change `flatten-replace-omml-bilateral-coverage`)。Pure transitive, no MCP source changes: + +- **Read side**: `flattenedDisplayText` walks direct-child OMML at all 4 wrapper positions +- **Write side**: NEW public API `WordDocument.replaceTextWithBoundaryDetection` returns informative `ReplaceResult` enum +- **Mirror invariant**: same surface coverage, asymmetric on OMML detected (reads include / writes refuse cross) +- **Library principles** (NEW spec capability): Correctness primacy + Human-like operations + +--- + +當前版本:**v3.17.6**(Plugin shell + Binary 同步)— bump `ooxml-swift` dep 0.21.9 → 0.21.10,closes [PsychQuant/che-word-mcp#104](https://github.com/PsychQuant/che-word-mcp/issues/104). **Form-level** FieldParser canonical 5-run fldChar fix(orthogonal to v3.17.5 的 #94 **container-level** fix — 兩者解 independent walker dimensions)。Pre-fix `update_all_fields` 在 docs containing valid SEQ paragraphs at body top level when fldChar block emitted in canonical 5-run form(每個 `` / `` / `` 各在獨立 `` sibling — DocxReader 後 / native Word emission)回 silent no-op (`"no SEQ fields found"`)。兩個 ooxml-swift commits land via dep bump:`537de62` FieldParser two-phase parse(Phase-1 baked + Phase-2 `parseFiveRunSpan` state machine probing both `Run.rawXML` and `Run.rawElements`)+ `58fe4f9` P1 sub-fix surfaced by 6-AI verify (Logic + Devil's Advocate runtime test): canonical-branch `Run.text` rewrite was silently overridden by `Run.toXML()` rawXML short-circuit; new `rewriteCanonicalCachedText` helper splices new value into embedded `` while preserving `` + `xml:space="preserve"`. **MCP impact**: `update_all_fields` now finds SEQ in canonical 5-run form (post-roundtrip / native Word emission); `list_captions` benefits transitively. Verified 6-AI ensemble (4 PASS / 2 WARN / 0 BLOCK)。5 P3 follow-ups filed in ooxml-swift(#29 SEQ Table coverage / #30 multi-paragraph counter / #31 multi-SEQ same paragraph / #32 DoS hardening / #33 discriminator invariant)。Tests: 236 passing che-word-mcp / 809→813 ooxml-swift (+4 sub-tests in `Issue104FieldParserCanonicalFormTests`)。Backward compatible — strict superset of pre-fix behavior。 + +**v3.17.6 bump `ooxml-swift` dep v0.21.9 → v0.21.10** — Form-level FieldParser fix closing [PsychQuant/che-word-mcp#104](https://github.com/PsychQuant/che-word-mcp/issues/104). Pure transitive, no MCP source changes: + +- **Form-level vs container-level distinction**: 此 fix 是 form-level(baked vs canonical fldChar emission),orthogonal to v3.17.5 的 #94 container-level fix(`.table` / `.contentControl` recursion)。兩者解 independent walker dimensions:v3.17.5 補 container 走訪、v3.17.6 補 fldChar form 偵測。Production reproducer `rescue-swift-v317.docx` 內 19 個 SEQ paragraphs(12 SEQ Figure + 7 SEQ Table)DocxReader 後是 canonical 5-run 形式,pre-v3.17.6 的 FieldParser 只看 baked form(v2.0.0 全 5 個 fldChar 元素塞同一 `Run.rawXML`)→ silent skip 整批。 +- **P1 sub-fix surfaced by 6-AI verify**: 第一輪 fix 只 mutate `Run.text`,但 `Run.toXML()` short-circuits on non-nil `rawXML`(`Run.swift:246-248`)— hand-built fixtures / native-Word-emit / 任何 upstream tool preserving raw form 都會 silent-broken。新 `rewriteCanonicalCachedText` helper 解決:splice 新值進 embedded `` 同時保留 `` 跟 `xml:space="preserve"`;`Run.text` 並行同步。 +- **5 P3 follow-ups filed (out of scope)**: ooxml-swift [#29](https://github.com/PsychQuant/ooxml-swift/issues/29) (SEQ Table identifier coverage) / [#30](https://github.com/PsychQuant/ooxml-swift/issues/30) (multi-paragraph counter accumulation) / [#31](https://github.com/PsychQuant/ooxml-swift/issues/31) (multiple SEQ in same paragraph) / [#32](https://github.com/PsychQuant/ooxml-swift/issues/32) (DoS hardening) / [#33](https://github.com/PsychQuant/ooxml-swift/issues/33) (discriminator invariant pin)。 + +**Tests**: 236 passing che-word-mcp / 809 → 813 ooxml-swift(+4 sub-tests)。Backward compatible — strict superset of pre-fix behavior。 + +**Verification**: 6-AI ensemble (5 Claude reviewers + Codex gpt-5.5 xhigh) — 4 PASS / 2 WARN / 0 BLOCK; production reproducer (rescue-swift-v317.docx via DocxReader path) confirmed working. + +--- + +**v3.17.5 bump `ooxml-swift` dep v0.21.8 → v0.21.9** — triple lib-only patch release covering [PsychQuant/che-word-mcp#87](https://github.com/PsychQuant/che-word-mcp/issues/87) + [#93](https://github.com/PsychQuant/che-word-mcp/issues/93) + [#94](https://github.com/PsychQuant/che-word-mcp/issues/94). Pure transitive, no MCP source changes: + +- **#87 `Comment.paragraphIndex` flat-paragraph counter (observable behavior change)** — pre-fix used `body.children.enumerated()` index counting tables/SDTs/markers; post-fix walks flat-paragraph counter mirroring `getParagraphs()` semantics. MCP impact: `list_comments` `paragraph_index` field now consistently 0-indexed against `get_paragraphs()` flat list. **Caller migration**: callers manually compensating with `paragraph_index - 1` (or `- N`) to work around the bug must remove their compensation, will over-correct otherwise. +- **#93 `wrap_caption_seq` SEQ inherits source `position`** — pre-fix caption like 「圖 4-1:xxx」 rendered as 「圖 4-:xxx1」 on real Word docs because SEQ run had `position=nil` while source-loaded preText/postText had `position>0`. Paragraph emit bifurcates by position field — positioned section vs legacy post-content section — so SEQ landed at end. One-line fix `seqRun.position = preRun.position`. **Known incompleteness**: `insert_bookmark=true` × source-loaded paragraph still has same gap on bookmarkStart/End markers (filed [PsychQuant/ooxml-swift#24](https://github.com/PsychQuant/ooxml-swift/issues/24)); default `insert_bookmark=false` unaffected. +- **#94 `update_all_fields` traverses `.table` and `.contentControl` containers** — pre-fix body loop only processed top-level `.paragraph` BodyChild — silently skipped `.table` and `.contentControl(_, children:)`. SEQ fields anchored inside table cells or block-level SDTs were never updated. New `walkAndProcessBodyChildForFields` recursive walker mirrors #68 pattern. Heading-count semantics: only top-level direct `.paragraph` body children count toward chapter-reset (container-nested headings do NOT trigger SEQ resets — conservative thesis-aligned choice). **Known incompleteness** (filed): [ooxml-swift#25](https://github.com/PsychQuant/ooxml-swift/issues/25) header/footer/footnote/endnote SEQ scans still flat `.paragraphs` view; [ooxml-swift#26](https://github.com/PsychQuant/ooxml-swift/issues/26) `FieldParser.parse(paragraph:)` misses inline SDT/hyperlink/fieldSimple/AlternateContent surfaces; [ooxml-swift#27](https://github.com/PsychQuant/ooxml-swift/issues/27) verify-with-user-fixture for real thesis docx roundtrip; plus [ooxml-swift#28](https://github.com/PsychQuant/ooxml-swift/issues/28) refactor candidate (extract `BodyChildVisitor` protocol to dedupe 5+ body-children walkers). + +**Tests**: 236 passing che-word-mcp / 805 → 809 ooxml-swift (no new MCP tests, no regressions). Backward compatible **except #87 documented behavior change**. + +**Verification**: 6-AI ensemble (5 Claude reviewers: requirements / logic / security / regression / devils-advocate + Codex gpt-5.5 xhigh). + +--- + +**v3.17.4 bump `ooxml-swift` dep v0.21.7 → v0.21.8** — paired patch release covering [PsychQuant/che-word-mcp#91](https://github.com/PsychQuant/che-word-mcp/issues/91) + [#92](https://github.com/PsychQuant/che-word-mcp/issues/92), both lib-only post-#85 verify follow-ups. Pure transitive, no MCP source changes: + +- **#91 `insertEquation` inline-mode error semantics** — `InsertLocationError.inlineModeRequiresParagraphIndex` (anchor type other than `.paragraphIndex` with inline math) + `.invalidParagraphIndex(Int)` (negative or `≥ topLevelParagraphCount`). MCP impact: `insert_equation` `paragraph_index` out-of-bounds now returns structured error instead of silent fallthrough. +- **#92 `flattenedDisplayText` OMML wrapper coverage** — extracted `flattenRunsWithOMML(_:)` private static helper; applied uniformly to top-level runs + `hyperlinks[].runs` + `fieldSimples[].runs` + `alternateContents[].fallbackRuns`. `contentControls` path remains separate via `flattenContentControlText` (different recursion strategy, established in #63). MCP impact: anchor lookups (`replace_text` / `insert_paragraph` / `insert_image_from_path` / `insert_caption` `before_text` / `after_text`) against paragraphs containing inline math inside hyperlink / fldSimple / AC fallback wrappers now succeed where they previously silently 0-matched. + +**Tests**: 236 passing (no new MCP tests, no regressions). Backward compatible (strict superset of pre-fix lookup behavior). + +**Follow-ups filed (out of scope)**: 5 sibling bugs surfaced by devils-advocate verify of #92 — DA-1..DA-4 cover direct-child OMML in `` / `` / `` + nested wrapper; DA-5 docstring qualifier. Filed as [#99](https://github.com/PsychQuant/che-word-mcp/issues/99)–[#103](https://github.com/PsychQuant/che-word-mcp/issues/103). NOT regressions introduced by v3.17.4. + +--- + +**v3.17.3 bump `ooxml-swift` dep v0.21.6 → v0.21.7** — patch release exposing public anchor lookup API ([che-word-mcp#86](https://github.com/PsychQuant/che-word-mcp/issues/86)). Pure transitive, no MCP source changes: + +- **`WordDocument.findBodyChildContainingText(_:nthInstance:)`** — instance method, was `private`. Walks `body.children` returning the top-level index of the BodyChild whose flattened text contains the needle. `nthInstance` disambiguates among repeats. Traverses `.contentControl(_, children:)` recursively + `.table` cells / `nestedTables` (since #68); skips `bookmarkMarker` / `rawBlockElement`. +- **`WordDocument.bodyChildContainsText(_:needle:)`** — static primitive, was `private static`. Single-child predicate; useful for custom traversal. +- **`WordDocument.tableContainsText(_:needle:)`** — static primitive, was `private static`. Walks all cells + nested tables. + +External consumers previously had to reimplement this with diverging semantics — some skipped `.contentControl` recursion, some skipped table cell traversal (pre-#68 default), some used different `nthInstance` counting rules (top-level body child vs flattened paragraph). Now they get *exactly* the canonical behavior `insert_paragraph` etc. tools see. + +**Tests**: 236 passing (no regressions). 10 new public-API surface tests in `Issue86PublicAnchorLookupTests` pin the canonical behavior across releases (top-level paragraph match / `nthInstance` disambiguation / invalid input contracts / SDT recursion / table cell traversal / bookmark + raw skip / `bodyChildContainsText` predicate / `tableContainsText` nested-table walk / round-trip parity with `insertParagraph(at: .afterText)`). + +**Backward compatibility**: Pure additive (private → public visibility change). No API removals, no behavioral changes for existing callers. + +--- + +**v3.17.2 bump `ooxml-swift` dep v0.21.2 → v0.21.6** — patch release pulling in 4 ooxml-swift releases worth of hardening + new APIs (no MCP source changes, no MCP tool surface change). New typed errors fire only on edge cases (malicious input, dirty fallback edits); deprecation warnings only when callers actively use lossy/legacy APIs. Pure transitive bump: + +- **[ooxml-swift#7](https://github.com/PsychQuant/ooxml-swift/issues/7) v0.21.3 XML hardening** — DTD reject + 64KB attribute-value cap + SAX-based root-element attribute parsing + name whitelist on emit. New `XMLHardeningError.{dtdNotAllowed, attributeValueTooLarge, invalidAttributeName}` throws on malicious .docx input. +- **[ooxml-swift#6](https://github.com/PsychQuant/ooxml-swift/issues/6) v0.21.4 roundtrip loud-fail** — `AlternateContent.fallbackRunsModified` dirty flag throws `RoundtripError.unserializedFallbackEdit` when in-memory mutation isn't propagated back to rawXML. `Run.commentIds` `@available(*, deprecated)`; migrate to `commentRangeMarkers`. +- **[che-word-mcp#84 #85](https://github.com/PsychQuant/che-word-mcp/issues/84) v0.21.5 insertEquation flexibility** — new `Document.insertEquation(at: InsertLocation, ...)` overload + `flattenedDisplayText` extension covers OMML so anchor-text resolution traverses display-mode equations. +- **[ooxml-swift#5](https://github.com/PsychQuant/ooxml-swift/issues/5) v0.21.6 mutation surface fix** — `Hyperlink.text` setter `@available(*, deprecated)` (lossy: discards `RunProperties` + `rawElements`); migrate to `.runs` property. Position field cascade `Int = 0` → `Int? = nil` across 13 typed-child models with tri-state semantics. `xml:space="preserve"` autosense in `Run.toXMLThrowing` emit. +- **Plus 3 unreleased docs commits on ooxml-swift main** — [#17](https://github.com/PsychQuant/ooxml-swift/issues/17) bidirectional `needsPPr` ↔ `Issue4PPrRegressionGuardTests` reference; [#15](https://github.com/PsychQuant/ooxml-swift/issues/15) `parseRun` vs `parseParagraph` walker pattern divergence rationale; [#14](https://github.com/PsychQuant/ooxml-swift/issues/14) foreign-namespace pPr asymmetry documentation. + +**Tests**: 236 passing (no regressions). Suite breakdown: 0 failures, 9 pre-existing skips. **Backward compatibility**: Deprecation warnings only when callers use `Hyperlink.text` setter / `commentIds`; new typed errors only on edge cases (malicious input, dirty fallback edits) — code that previously silently lost data now throws fail-fast. No API removals (all v0.21.x deprecations queued for v0.22). + +**Verification**: 6-AI ensemble verified each ooxml-swift release upstream (5 Claude reviewers + Codex CLI gpt-5.5 xhigh). Three SDD bundles archived: `mutation-surface-fix`, `roundtrip-loud-fail`, `harden-xml-security`. + +--- + +**v3.17.1 bump `ooxml-swift` dep v0.21.0 → v0.21.2** — patch release pulling in upstream test/regression-guard hardening from the post-#56 follow-up bundle. **No public MCP tool change**, no behaviour change for valid input。Pure transitive hardening at the parser invariant layer: + +- **[ooxml-swift#4](https://github.com/PsychQuant/ooxml-swift/issues/4)** — `pPr` regression guard. 3-layer defense-in-depth on the v0.19.1 hot-fix invariant:`walkerPreConsumed: Set = ["pPr"]` 在 `parseParagraph` walker loop entry filter;既有 `case "pPr": break` 保留為 belt-and-suspenders;`#if DEBUG assert(name != "pPr", ...)` 在 catch-all default `unrecognizedChildren.append` site 作 dev-time canary。5 regression tests in `Issue4PPrRegressionGuardTests`。 +- **[ooxml-swift#13](https://github.com/PsychQuant/ooxml-swift/issues/13)** — empty `` self-closing test gap closed。TDD discovered emitter drops empty pPr blocks(`needsPPr` gate at `Paragraph.swift:335,466`);test name + assertion direction 反映 empirical OOXML-spec behaviour。 +- **[ooxml-swift#16](https://github.com/PsychQuant/ooxml-swift/issues/16)** — `countPPrOpenTags` helper regex hardening。Substring `]"#"` — 排除 ``(Track Changes paragraph-property revision element),先前會虛增計數造成 brittle 假失敗。 + +ooxml-swift suite 720 → 722(0 fail)。che-word-mcp suite 不變(pure transitive dep bump)。 + +**Verification**:6-reviewer cross-verification on #4(5 Claude teammates + Codex CLI gpt-5.5 xhigh, all PASS unanimous);5-reviewer on #13/#16 batch(Codex hung, skipped)。 + +--- + +**Pre-v3.17.x architectural milestones**(detail in [CHANGELOG](https://github.com/PsychQuant/che-word-mcp/blob/main/CHANGELOG.md)):sub-stacks A–E 完整覆蓋 paragraph + run scope(`if not typed, preserve as raw` principle)。Combined preservation gains since v3.14.0:`` 50% → 98.89%;`w14:` 5% → 93.98%;`document.xml` size loss 16.66% → 8.02%。Matrix-pin `testDocumentContentEqualityInvariant` 現在 LOAD-BEARING across 5 preservation classes(rFonts / noProof / lang / kern / w14:)spanning run + paragraph + paragraph-mark scope — 任何一類 regression 都會 trip。 + +--- + +Office.js OOXML Roadmap **P0 100% 完成**(Umbrella issue [#43](https://github.com/PsychQuant/che-word-mcp/issues/43))。Latest milestones:v3.17.4 — bump ooxml-swift dep to v0.21.8(paired #91 insertEquation inline-mode error semantics + #92 flattenedDisplayText OMML wrapper coverage, both lib-only post-#85 verify follow-ups);v3.17.3 — bump ooxml-swift dep to v0.21.7(exposes public anchor lookup API for external Swift SPM consumers, [che-word-mcp#86](https://github.com/PsychQuant/che-word-mcp/issues/86));v3.17.2 — bump ooxml-swift dep to v0.21.6(4-version transitive bump pulling in XML hardening + roundtrip loud-fail + insertEquation InsertLocation overload + mutation surface fix from ooxml-swift#5/#6/#7);v3.17.1 — bump ooxml-swift dep to v0.21.2(pPr regression-guard hardening from ooxml-swift#4/#13/#16);v3.17.0 — `wrap_caption_seq` MCP tool([#62](https://github.com/PsychQuant/che-word-mcp/issues/62),rescues docs pasted from external sources so `insert_table_of_figures` / `insert_table_of_tables` produce populated TOFs);v3.16.0 — Bundle B anchor DX consistency([#70](https://github.com/PsychQuant/che-word-mcp/issues/70) [#71](https://github.com/PsychQuant/che-word-mcp/issues/71) [#72](https://github.com/PsychQuant/che-word-mcp/issues/72),BREAKING input-validation:conflicting anchors + `text_instance ≤ 0` 改 structured error)。**Architectural extension of 'if not typed, preserve as raw' principle** — 從 sub-stack A (#58 BodyChild) → B (#59 WhitespaceOverlay) → C (#60 RunProperties) → D (#65 ParagraphProperties.markRunProperties) → **E (#66 paragraph w14:* attrs)** 完整覆蓋 paragraph + run scope。 + +**前次 milestones**: + +- **v3.17.4** — bump `ooxml-swift` dep v0.21.7 → v0.21.8(paired #91 + #92 release)。#91 adds `InsertLocationError.inlineModeRequiresParagraphIndex` + `.invalidParagraphIndex(Int)` for `insert_equation` inline-mode anchor type / out-of-bounds `paragraph_index` (was silent fallthrough). #92 extends `flattenedDisplayText` OMML walk via shared `flattenRunsWithOMML(_:)` helper across hyperlink / fldSimple / AC.fallbackRuns wrappers (top-level runs + 3 wrapper paths now uniformly covered; contentControls path remains separate via `flattenContentControlText` per #63). MCP impact: `insert_equation` returns structured errors instead of silent no-op; anchor lookups against wrapper-nested OMML paragraphs now succeed where they previously silently 0-matched. Strict superset of pre-fix lookup behavior. Tests 236 passing. 5 follow-up issues #99-#103 filed for sibling bugs (DA-1..DA-5: direct-child OMML in `` / `` / `` + nested wrapper + docstring qualifier). +- **v3.17.3** — bump `ooxml-swift` dep v0.21.6 → v0.21.7(exposes public anchor lookup API: `WordDocument.findBodyChildContainingText` instance method + `bodyChildContainsText` / `tableContainsText` static primitives, all `private → public`)。Pure transitive, no MCP source changes. External Swift SPM consumers can now call canonical anchor-lookup logic directly instead of reimplementing with diverging semantics. 10 new public-API surface tests. Backward compatible (additive visibility change). +- **v3.17.2** — bump `ooxml-swift` dep v0.21.2 → v0.21.6 (4-version transitive bump: v0.21.3 XML hardening + v0.21.4 roundtrip loud-fail + v0.21.5 insertEquation InsertLocation overload + v0.21.6 mutation surface fix + 3 unreleased docs commits on ooxml-swift main)。No MCP tool surface change. Deprecation warnings for `Hyperlink.text` setter / `commentIds` (queued for v0.22 removal). Tests 236 passing. +- **v3.17.1** — bump `ooxml-swift` dep v0.21.0 → v0.21.2 (pPr regression-guard + test infra hardening from upstream #4/#13/#16)。No public MCP tool change。 +- **v3.17.0** — `wrap_caption_seq` MCP tool(Refs [#62](https://github.com/PsychQuant/che-word-mcp/issues/62),Phase 2):bulk-wraps plain-text caption number portions in SEQ field runs across body paragraphs whose flattened text matches a regex (EXACTLY ONE numeric capture group)。Captured digit becomes SEQ field cachedResult so Word's first-open render preserves user-typed numbering。Idempotent + scope:body-only + bookmark wrap opt-in。Suite 231 → 236 (+5)。Tools 234 → 235。 +- **v3.16.2** — pure dep bump `ooxml-swift` v0.20.5 → v0.21.0 (Refs [#62](https://github.com/PsychQuant/che-word-mcp/issues/62) [#68](https://github.com/PsychQuant/che-word-mcp/issues/68)):picks up `InsertLocation.findBodyChildContainingText` recursing into table cells + block-level SDT (#68) + `WordDocument.wrapCaptionSequenceFields` lib API ready for v3.17.0 MCP wrapper (#62)。 +- **v3.16.1** — anchor-presence whitelist drift prevention(Refs [#80](https://github.com/PsychQuant/che-word-mcp/issues/80)):pure refactor,static `toolAnchorWhitelists` dict 替換 4 個分散的 literal anchor arrays。4 invariant tests(4 → 8)防將來新增 anchor 漏接 conflict-detection。No runtime behavior change。 +- **v3.16.0** — Bundle B anchor DX consistency(Refs [#70](https://github.com/PsychQuant/che-word-mcp/issues/70) [#71](https://github.com/PsychQuant/che-word-mcp/issues/71) [#72](https://github.com/PsychQuant/che-word-mcp/issues/72),**BREAKING input validation only**):(a) conflicting anchors(如 `after_text` + `index`)silent-priority 改成 structured error;(b) explicit `text_instance ≤ 0` 拒絕;(c) 4 個 #61-target tools 統一錯誤格式 `Error: : ` 給 AI-caller 錯誤歸因。Suite 201 → 227 (+26)。 +- **v3.15.3** — Bundle A2 polish(Refs [#76](https://github.com/PsychQuant/che-word-mcp/issues/76) [#77](https://github.com/PsychQuant/che-word-mcp/issues/77) [#78](https://github.com/PsychQuant/che-word-mcp/issues/78) [#79](https://github.com/PsychQuant/che-word-mcp/issues/79)):schema description doc rot 修正 + bookmarkMarker / SDT / TOC bookmark / contentControl 的 append-index regression pin。Suite 196 → 201。 +- **v3.15.2** — Bundle A polish(Refs [#69](https://github.com/PsychQuant/che-word-mcp/issues/69) [#73](https://github.com/PsychQuant/che-word-mcp/issues/73) [#74](https://github.com/PsychQuant/che-word-mcp/issues/74) [#75](https://github.com/PsychQuant/che-word-mcp/issues/75)):`insert_paragraph` append message 改回 body.children index(pre-fix `getParagraphs().count - 1` 在含 tables/SDTs 文件 mis-report)+ debug log label fixes + equation F5 partial-dict regression pin。Suite 194 → 196。 +- **v3.15.1** — Verify findings F1+F2+F3+F5 closed(Refs [#61](https://github.com/PsychQuant/che-word-mcp/issues/61)):`after_image_id` 加到 insert_paragraph + insert_equation (display only) + insert_image_from_path;`into_table_cell` 加到 insert_equation (display only);equation 成功訊息加 anchor info;malformed `into_table_cell` partial dict 改 structured error。Suite 185 → 194。 +- **v3.15.0** — `insert_paragraph` / `insert_equation` accept anchor parameters(Refs [#61](https://github.com/PsychQuant/che-word-mcp/issues/61)):closes MCP-side wire-up gap,handler dispatch 之前 silently 丟棄 `after_text` / `before_text` / `text_instance` / `into_table_cell`。Anchor priority `into_table_cell > after_text > before_text > index > append`。Inline equation explicitly 拒絕 anchor。Suite 176 → 185。 +- **v3.14.5** — `findBodyChildContainingText` 涵蓋所有 editable surfaces(Refs [#63](https://github.com/PsychQuant/che-word-mcp/issues/63) verify F1):bumps `ooxml-swift` v0.20.4 → v0.20.5。Closes v3.14.4 CHANGELOG over-claim — REPLACE path 已修但 LOOKUP path(`InsertLocation.afterText`)仍只看 `para.runs`。`Paragraph.flattenedDisplayText` 擴充為涵蓋 runs + hyperlinks + fieldSimples + alternateContents + contentControls (recursive into nested SDT)。Suite 174 → 176。 +- **v3.14.4** — `replace_text` on inline `` content controls fix(Refs [#63](https://github.com/PsychQuant/che-word-mcp/issues/63)):`Document.replaceInParagraphSurfaces` 之前覆蓋 runs / hyperlinks / fieldSimples / alternateContents 但 **沒有** contentControls — 包在 inline `` 裡的文字 silently 0-match。常見於 pandoc / Quarto / LaTeX→docx cross-ref placeholder。bumps `ooxml-swift` v0.20.3 → v0.20.4。Suite 172 → 174。 +- **v3.14.3** sub-stack E (#66) — paragraph `w14:paraId` / `w14:textId` round-trip;`w14:` retention 5% → 93.98%;document.xml size loss 16.66% → 8.02%。 +- **v3.14.2** sub-stack D (#65) — `ParagraphProperties.markRunProperties` round-trips `` direct child of ``(pilcrow ¶ 字符外觀 per ECMA-376 §17.3.1.27)。`` retention 50% → 98.89%。 +- **v3.14.1** sub-stack C-CONT — closes triple-confirmed P0 (R2 + R5 + Codex 6-AI verify):trim `recognizedRprChildren` Set 到 actually-extracted kinds(修了 `` / `` / `` / `` / `` 等的 silent drop)。Round-trip size loss 17.75% → 16.66%。 +- **v3.14.0** sub-stack C of [#60](https://github.com/PsychQuant/che-word-mcp/issues/60) — `RunProperties` 新增 4-axis rFonts / noProof / kern / 3-axis lang typed fields + rawChildren passthrough(w14:textOutline / textFill / glow 等)。Pre-fix `eastAsia="DFKai-SB"`(繁體中文)會 silently 被 `ascii` 值替換掉;v3.14.0 完整保留 4 個 axis。 +- **v3.13.13** CRITICAL HOTFIX (sub-stack B-CONT-2-CONT);v3.13.12 (DO NOT USE — 刪除 `` 內容);v3.13.11 sub-stack B-CONT;v3.13.10 sub-stack B 初版 (#59 WhitespaceOverlay);v3.13.9 A-CONT-3 silent correctness regression;v3.13.6-v3.13.8 sub-stack A cycles (#58 BodyChild);v3.13.5 R5 stack-completion — 詳見 [CHANGELOG](https://github.com/PsychQuant/che-word-mcp/blob/main/CHANGELOG.md)。 + +## 兩種操作模式 + +### Direct Mode(`source_path`)— 唯讀,免開啟 + +傳入檔案路徑直接使用,不需要先 `open_document`。適合快速檢視。 + +``` +list_images: { "source_path": "/path/to/file.docx" } +search_text: { "source_path": "/path/to/file.docx", "query": "keyword" } +get_paragraphs: { "source_path": "/path/to/file.docx" } +``` + +### Session Mode(`doc_id`)— 完整讀寫生命週期 + +先 `open_document` 取得 `doc_id`,再進行編輯操作。 + +``` +open_document: { "path": "/path/to/file.docx", "doc_id": "mydoc" } +insert_paragraph: { "doc_id": "mydoc", "text": "Hello World" } +save_document: { "doc_id": "mydoc", "path": "/path/to/output.docx" } +close_document: { "doc_id": "mydoc" } +``` + +v3.0.0+ session state 追蹤:dirty tracking、autosave、`finalize_document`、disk drift 偵測。 + +## Round-trip Fidelity(v3.5.0 true byte-preservation) + +底層 `ooxml-swift v0.19.2` 採用 **preserve-by-default + dirty tracking** 架構:`open_document` 保留原始 archive tempDir;`save_document` overlay 模式透過 `WordDocument.modifiedParts: Set` 精確判斷哪些 part 真正被改動,**未改動的 typed-managed part 完全不重寫**——byte-for-byte 保留 `word/theme/`、`webSettings.xml`、`people.xml`、`commentsExtended/Extensible/Ids`、`glossary/`、`customXml/`、**以及 `word/document.xml`、`styles.xml`、`fontTable.xml`、`header*.xml`、`footer*.xml`、`comments.xml`、`footnotes.xml`、`endnotes.xml`** 等所有 typed parts。v0.19.x 額外解決 #56 P0:`` root 34 個 `xmlns:*` declarations 完整保留,`` / `` / `` / `` 結構化 wrapper 全程 round-trip(pre-v0.19.0 會 silently 丟掉 wrapper 內 354 個 `` text nodes)。 + +NTPU 學位論文模板的中文字體(DFKai-SB / 華康中楷體)no-op `save_document` 後完整保留 13 fontTable + 6 distinct headers + 4 footers + three-segment PAGE field + `` identity。 + +## Track Changes 寫側合約(v3.12.0+) + +兩條路徑:**accept/reject 既有修訂** OR **程式化生成新修訂**。 + +### 程式化生成(v3.12.0 新增) + +``` +1. enable_track_changes(doc_id, author: "Reviewer A") +2. insert_text_as_revision(doc_id, paragraph_index, position, text) + delete_text_as_revision(doc_id, paragraph_index, start, end) + move_text_as_revision(doc_id, source/dest) + format_text(doc_id, paragraph_index, bold: true, as_revision: true) + set_paragraph_format(doc_id, paragraph_index, alignment: "center", as_revision: true) +3. save_document(...) +``` + +**Author resolution 三層 fallback**:explicit `author` arg → `revisions.settings.author`(在 `enable_track_changes` 時設定)→ `"Unknown"`。 + +**Side-effect 合約**(重要):`as_revision: true` 要求 track changes 已開啟。Disabled 時呼叫**會拋 `track_changes_not_enabled`**,不會偷偷 auto-enable。設計理由:避免 hidden state mutation。 + +### Accept / Reject 既有修訂 + +``` +get_revisions / accept_revision / reject_revision / accept_all_revisions / reject_all_revisions +``` + +## Direct Mode 支援的工具 + +| 類別 | 工具 | +|------|------| +| 讀取內容 | `get_text`, `get_document_text`, `get_paragraphs`, `get_document_info`, `search_text` | +| 列出元素 | `list_images`, `list_styles`, `get_tables`, `list_comments`, `list_hyperlinks`, `list_bookmarks`, `list_footnotes`, `list_endnotes`, `get_revisions`, `list_content_controls` | +| 屬性 | `get_document_properties`, `get_section_properties`, `get_word_count_by_section` | +| 匯出 | `export_markdown` | + +## 工具總覽 + +### 文件管理 + Session 生命週期(v3.0.0+) + +- `create_document`, `open_document`, `save_document`, `close_document`, `finalize_document` +- `list_open_documents`, `get_document_info` ⚡, `get_document_session_state`, `get_session_state` +- `check_disk_drift`, `revert_to_disk`, `reload_from_disk`, `recover_from_autosave`, `checkpoint` + +### 內容操作 + +- `get_text` ⚡, `get_document_text` ⚡, `get_paragraphs` ⚡, `search_text` ⚡, `search_text_batch` +- `insert_paragraph`, `update_paragraph`, `delete_paragraph` +- `replace_text`, `replace_text_batch`, `insert_text` + +### 格式設定 + +- `format_text`(**v3.12.0+ `as_revision: bool`**), `set_paragraph_format`(**v3.12.0+ `as_revision: bool`**), `apply_style` +- `set_paragraph_border`, `set_paragraph_shading`, `set_character_spacing`, `set_text_effect` +- `get_paragraph_runs`, `get_text_with_formatting`, `search_by_formatting`, `search_text_with_formatting`, `list_all_formatted_text` + +### 樣式管理(v3.10.0+ 強化,#48) + +- `list_styles` ⚡, `apply_style`, `create_style`, `update_style`, `delete_style` +- v3.10.0 新增:`get_style_inheritance_chain`(含 cycle detection), `link_styles`(`` 段落+字元 pair), `set_latent_styles`, `add_style_name_alias`(BCP 47 多語) +- `create_style` / `update_style` 新增 6 個 args:`based_on`, `linked_style_id`, `next_style_id`, `q_format`, `hidden`, `semi_hidden` + +### Numbering / 編號清單(v3.10.0+ 完整補完,#46) + +- 入門:`insert_bullet_list`, `insert_numbered_list`, `set_list_level`, `set_outline_level` +- 定義管理(v3.10.0 新增 8 個):`list_numbering_definitions`, `get_numbering_definition`, `create_numbering_definition`(max 9 levels), `override_numbering_level`, `assign_numbering_to_paragraph`, `continue_list`, `start_new_list`, `gc_orphan_numbering` + +### 區段 / 頁面設定(v3.10.0+ 強化,#47) + +- 基礎:`set_page_size`, `set_page_margins`, `set_page_orientation`, `set_page_borders`, `set_columns`, `set_line_numbers`, `set_text_direction` +- 區段斷點:`insert_page_break`, `insert_section_break`, `insert_continuous_section_break`, `insert_column_break` +- v3.10.0 新增 7 個:`set_line_numbers_for_section`(legal docs ``), `set_section_vertical_alignment`(封面置中), `set_page_number_format`(羅馬數字等), `set_section_break_type`, `set_title_page_distinct`, `set_section_header_footer_references`, `get_all_sections` + +### 表格(v3.11.0+ 強化,#49) + +- 基礎:`insert_table`, `get_tables` ⚡, `update_cell`, `delete_table` +- 結構:`merge_cells`, `set_table_style`, `set_table_alignment` +- 行列:`add_row_to_table`, `add_column_to_table`, `delete_row_from_table`, `delete_column_from_table` +- 尺寸:`set_cell_width`, `set_row_height`, `set_cell_vertical_alignment`, `set_header_row` +- v3.11.0 新增 5 個:`set_table_conditional_style`(10 種 region:firstRow / lastRow / bandedRows…),`insert_nested_table`(最深 5 層,超過拋 `nested_too_deep`),`set_table_layout`(fixed/autofit),`set_table_indent`(``) + +### 超連結(v3.11.0+ 三種 typed,#50) + +- 基礎:`insert_hyperlink`, `update_hyperlink`, `delete_hyperlink`, `list_hyperlinks` ⚡ +- 內部連結:`insert_internal_link`, `insert_cross_reference` +- v3.11.0 新增 3 個 typed:`insert_url_hyperlink`(外部 URL + tooltip + history flag),`insert_bookmark_hyperlink`(`w:anchor`,無 rId),`insert_email_hyperlink`(`mailto:` + URL-encoded subject)— 三者自動建 Hyperlink character style + +### 頁首頁尾 + 浮水印(v3.3.0+ → v3.11.0+ 強化,#51) + +- 寫入:`add_header`, `update_header`, `add_footer`, `update_footer`, `insert_page_number` +- 列舉與讀取:`list_headers`, `get_header`, `list_footers`, `get_footer` +- 刪除:`delete_header`, `delete_footer` +- v3.11.0 新增 4 個:`enable_even_odd_headers`(``),`link_section_header_to_previous` / `unlink_section_header_from_previous`(Word-compat clone),`get_section_header_map` +- 浮水印:`insert_watermark`, `insert_image_watermark`, `remove_watermark`, `list_watermarks`, `get_watermark` + +### Content Controls / SDT(v3.9.0+ 完整 read/write,#44) + +- 寫入:`insert_content_control`(12 type discrimination:richText / plainText / picture / date / dropDownList / comboBox / checkBox / bibliography / citation / group / repeatingSection / repeatingSectionItem) +- 讀取:`list_content_controls` ⚡(flat 或 nested tree mode),`get_content_control`(by id / tag / alias,回 metadata + `` XML) +- 修改:`update_content_control_text`(preserves `` byte-identical),`replace_content_control_content`(whitelist validation,拒絕含 `` / `` / `` 的 input) +- 刪除:`delete_content_control`(`keep_content: true` 預設 unwrap children) +- Repeating sections:`insert_repeating_section`, `list_repeating_section_items`, `update_repeating_section_item` +- 表單便利:`insert_checkbox`, `insert_dropdown`, `insert_text_field` +- v0.15.0 SDT id allocator 改 max+1 deterministic(取代 random) + +### 主題編輯(v3.3.0+,#28) + +- `get_theme` — 讀 major/minor 字體 + 色盤 +- `update_theme_fonts` — 部分更新字體 slot(latin/ea/cs)。**用於 NTPU 論文中文字體修復**:`update_theme_fonts({ minor: { ea: "DFKai-SB" }})` +- `update_theme_color` — slot-named hex color 更新(accent1-6 / hyperlink / followedHyperlink) +- `set_theme` — 完整 theme XML 覆寫 escape hatch + +### 圖片 + +- `insert_image`, `insert_image_from_path`, `insert_floating_image` +- `update_image`, `delete_image`, `list_images` ⚡, `set_image_style` +- `export_image`, `export_all_images`, `insert_drop_cap` + +### 數學公式(v3.2.0+ 完整 LaTeX 子集) + +- `insert_equation` — 透過 [`latex-math-swift`](https://github.com/PsychQuant/latex-math-swift):`\frac`, `\sqrt`, `\hat`/`\bar`/`\tilde`, `\left/\right`, `\sum`/`\int`/`\prod`(with bounds),`\ln`/`\sin`/`\cos`/`\tan`/`\log`/`\exp`/`\max`/`\min`/`\det`,`\sup`/`\inf`/`\lim`,`\text{}`,全部希臘字母(含 `\varepsilon` 變體)+ 常用運算子 +- `list_equations`, `get_equation`, `update_equation`, `delete_equation` +- `splice_omath_from_source`, `splice_paragraph_omath_from_source` — 跨 doc verbatim copy `` XML(v3.20.0+,#160;wraps ooxml-swift v0.24.0 `spliceOMath` API) + +### 匯出 + +- `export_text`, `export_markdown` ⚡, `export_revision_summary_markdown`, `export_comment_threads_markdown` + +### 註解(v3.4.0+ thread 管理) + +- 寫入:`insert_comment`, `update_comment`, `delete_comment`, `reply_to_comment`, `resolve_comment` +- 讀取:`list_comments` ⚡ +- Thread 管理:`list_comment_threads`, `get_comment_thread`, `sync_extended_comments` + +### People / Comment Authors(v3.4.0+,v3.5.0 dual identity #34) + +- `list_people` — 解析完整 ``,回 dual identity: + - `person_id` (GUID, 來自 `userId="S::email::guid"` 第三段),rename 跨版本穩定 + - `display_name_id` (= author,v3.4.0 legacy id) + - `display_name`, `email`, `color`, `provider_id` +- `add_person`, `update_person`, `delete_person` — 接受 GUID **或** legacy author 任一形式 + +### Track Changes / Revisions(v3.12.0 寫側 #45) + +- 既有修訂處理:`enable_track_changes`, `disable_track_changes`, `get_revisions` ⚡, `accept_revision`, `reject_revision`, `accept_all_revisions`, `reject_all_revisions` +- **v3.12.0 程式化生成(NEW)**: + - `insert_text_as_revision(doc_id, paragraph_index, position, text, author?, date?)` — `` 包覆,跨 run split 處理 + - `delete_text_as_revision(doc_id, paragraph_index, start, end, author?, date?)` — `` 標記 + `` → `` substitution(單段內,跨段 OOS) + - `move_text_as_revision(doc_id, from_paragraph_index, from_start, from_end, to_paragraph_index, to_position, author?, date?)` — paired `` / `` with adjacent revision ids + - `format_text` 加 `as_revision: bool` → `` + - `set_paragraph_format` 加 `as_revision: bool` → `` +- 比對:`compare_documents`, `compare_documents_markdown` + +### 註腳與尾注(v3.4.0+ 補完 update) + +- 寫入:`insert_footnote`, `delete_footnote`, `insert_endnote`, `delete_endnote` +- 讀取:`list_footnotes` ⚡, `list_endnotes` ⚡ +- v3.4.0+:`get_footnote`, `update_footnote`, `get_endnote`, `update_endnote`(in-place 替換、保留 ID) + +### 標號與目錄 + +- `insert_caption`, `list_captions`, `get_caption`, `update_caption`, `delete_caption`(v3.1.0+) +- `insert_table_of_figures`, `insert_index`, `insert_index_entry`, `insert_toc` +- `update_all_fields`(v3.1.0+,#19)— F9 等價,全文 SEQ 重算 +- `insert_cross_reference` + +### Custom XML / Web Settings + +- `list_custom_xml_parts`(v3.9.0+ stub,real impl 待 Change B) +- `get_web_settings`, `update_web_settings`(v3.4.0+,#31)— `relyOnVML` / `optimizeForBrowser` / `allowPNG` / `doNotSaveAsSingleFile` + +### 屬性與保護 + +- `get_document_properties` ⚡, `set_document_properties` +- `get_section_properties` ⚡, `get_word_count_by_section` ⚡ +- `protect_document`, `unprotect_document`, `set_document_password`, `remove_document_password` +- `restrict_editing_region` + +### 欄位代碼 + +- `insert_date_field`, `insert_page_field`, `insert_sequence_field`, `insert_merge_field` +- `insert_calculation_field`, `insert_if_field`, `insert_text_field` + +### 進階格式 / 排版 + +- 分欄、tab stops、drop cap、horizontal line、symbol +- `insert_horizontal_line`, `insert_drop_cap`, `insert_symbol`, `insert_column_break` +- `insert_tab_stop`, `clear_tab_stops` +- `set_keep_lines`, `set_keep_with_next`, `set_widow_orphan`, `set_outline_level`, `set_page_break_before` +- 字體和語言:`set_language` + +⚡ = 支援 Direct Mode + +## 技術細節 + +- **語言**: Swift(macOS 13.0+) +- **MCP SDK**: swift-sdk 0.12+ +- **OOXML 引擎**: [`ooxml-swift v0.19.2`](https://github.com/PsychQuant/ooxml-swift)(preserve-by-default + dirty tracking + revision generation + `document.xml` lossless round-trip) +- **LaTeX parser**: [`latex-math-swift v0.1.0+`](https://github.com/PsychQuant/latex-math-swift)(v3.2.0+) +- **Markdown export**: [`word-to-md-swift`](https://github.com/PsychQuant/word-to-md-swift) + [`markdown-swift`](https://github.com/PsychQuant/markdown-swift) + +## 版本 + +- **Plugin shell**: v3.13.2 +- **Binary**: v3.13.2(`CheWordMCP`) +- **GitHub**: https://github.com/PsychQuant/che-word-mcp +- **完整 CHANGELOG**: https://github.com/PsychQuant/che-word-mcp/blob/main/CHANGELOG.md + +### Plugin Shell vs Binary 版本 + +兩者獨立但本次 v3.13.2 同步。Plugin shell(marketplace 端,含 SKILL.md / CLAUDE.md / `.mcp.json` / wrapper)有自己的 minor,反映文件 / skill 變動;Binary(GitHub release 端)有自己的 minor,反映 MCP server 內部新增 tool 或修 bug。Wrapper auto-download 從 release fetch binary 到 `~/bin/CheWordMCP`。 + +### 重要 milestones + +- **v3.13.2** — bump ooxml-swift v0.19.1→v0.19.2,修正 v3.13.1 的 6-AI verification 找到的 4 個 blocking findings(無 source 變更):F1 `Hyperlink.toXML()` 真正 emit Reader 解出的 runs/rawAttributes/rawChildren(pre-fix 把所有 inner runs 攤平成單一 hardcoded 藍底線 styled run)、F2 `addBookmark`/`deleteBookmark` 同步 `bookmarkMarkers`(pre-fix source-loaded paragraph 的新 bookmark 在 save 時被靜默丟掉、刪除留下 zombie `name=""` markers)、F3 ``/``/``/`` Reader 為 inner runs 設 `position`+`revisionId` + Writer sort path 重新 group runs by revisionId 包回 wrapper(pre-fix wrapper 在 source-load round-trip 後完全消失)、F4 namespace 保留從 `document.xml` 擴展到 header/footer/footnote/endnote 各自的 root(pre-fix NTPU thesis VML watermark headers declaring mc/wp/w14/w15 silently 退化到 hardcoded 5-namespace template) +- **v3.13.1** — `pPr` double-emission silent regression hot-fix(bump ooxml-swift v0.19.0→v0.19.1,無 source 變更)。NTPU thesis 驗證時抓到:v3.13.0 round-trip 後 `` 被雙重 emit,unrecognized children 從 799→1333(+67%)。1-line `case "pPr": break` 修正 +- **v3.13.0** — `document.xml` lossless round-trip + tool-mediated wrapper edits(closes [#56](https://github.com/PsychQuant/che-word-mcp/issues/56) P0)— 完整保留 `` root 34 個 `xmlns:*` declarations、`` / `` / `` / `` 全程 round-trip(pre-v3.13.0 silently 丟掉 32 namespaces / 100% bookmarks / 354 個 `` text nodes 在 wrapper 內),`replace_text` 走 wrapper-internal runs(無 silent failure)。Built on ooxml-swift v0.19.x +- **v3.12.0** — Programmatic Track Changes 生成 — 3 新 MCP 工具(insert_text_as_revision / delete_text_as_revision / move_text_as_revision)+ 2 擴充 args(format_text / set_paragraph_format 加 `as_revision: bool`)— closes [#45](https://github.com/PsychQuant/che-word-mcp/issues/45),**Office.js OOXML Roadmap P0 100% 完成** +- **v3.11.0** — Tables / Hyperlinks / Headers extensions(16 新工具:5 table conditional/nested/layout + 3 typed hyperlinks + 4 header even/odd/link/section-map)— closes [#49](https://github.com/PsychQuant/che-word-mcp/issues/49) [#50](https://github.com/PsychQuant/che-word-mcp/issues/50) [#51](https://github.com/PsychQuant/che-word-mcp/issues/51) +- **v3.10.0** — Styles + Numbering + Sections foundation(19 新工具 + 6 擴充 args:4 style inheritance/link/latent/alias + 8 numbering definition lifecycle + 7 section vertical/break/title-page)— closes [#46](https://github.com/PsychQuant/che-word-mcp/issues/46) [#47](https://github.com/PsychQuant/che-word-mcp/issues/47) [#48](https://github.com/PsychQuant/che-word-mcp/issues/48) +- **v3.9.0** — Content Controls (SDT) 完整 read/write(7 新工具 + 12-type discrimination + nested SDT tree)— closes [#44](https://github.com/PsychQuant/che-word-mcp/issues/44) +- **v3.8.0** — Header/footer raw-element preservation + counter-isolation flag — closes [#52](https://github.com/PsychQuant/che-word-mcp/issues/52) +- **v3.7.0–v3.7.2** — Save durability cycle: path-traversal 修正 / hdr-ftr auto-suffix / `updateAllFields`(#53–#55, #54) +- **v3.6.0** — Save durability + autosave Design B + serial-only OOXML IO — closes #40 #41 +- **v3.5.0** — true byte-preservation via dirty tracking — closes #23 round-2 + #32 #33 #34 +- **v3.4.0** — Phase 2B+2C: 13 個 comment-thread/people/notes-update/web-settings 工具 — closes #24 #25 #29 #30 #31 +- **v3.3.0** — Phase 2A: 12 個 theme/header/footer/watermark 工具 — closes #26 #27 #28 +- **v3.2.0** — `insert_equation` LaTeX parser delegated to `latex-math-swift` — closes #22 +- **v3.1.0** — 9 個 readback tools(Caption CRUD, update_all_fields, Equation CRUD)— closes #17 #19 #21 +- **v3.0.0** — Session state API(dirty tracking, autosave, finalize_document, disk drift detection)— closes #12 #13 #15 + +### 底層架構里程碑 + +- **`ooxml-swift v0.19.0–0.19.2`** — `document.xml` lossless round-trip:root namespace preservation(34 `xmlns:*` + `mc:Ignorable`)+ Bookmark Reader parsing + Wrapper hybrid model(`Hyperlink` / `FieldSimple` / `AlternateContent` typed editable surface + raw passthrough)+ sort-by-position Writer emit + 6 raw-carrier types for `` schema completeness。v0.19.1 修正 v0.19.0 的 pPr double-emission silent regression。v0.19.2 修正 6-AI verification 找到的 4 個 blocking findings:Hyperlink writer 真正 iterate runs+rawAttributes+rawChildren、bookmark mutation API 同步 markers、ins/del/moveFrom/moveTo wrapper round-trip(Reader 設 position+revisionId、Writer sort path 重新 group by revisionId)、namespace 保留擴展到 header/footer/footnote/endnote(新 `ContainerRootTag` helper) +- **`ooxml-swift v0.18.0`** — Revision generation primitives(6 new WordDocument methods + writer-side `Paragraph.toXML()` revision wrapping + `` → `` substitution) +- **`ooxml-swift v0.16.0–0.17.0`** — Styles inheritance / Numbering lifecycle / Section vertical alignment / Table conditional+nested+indent / Typed hyperlinks / Even-odd headers +- **`ooxml-swift v0.15.0–0.15.1`** — `SDTParser` first-class `` model + `BodyChild.contentControl` + max+1 SDT id allocator +- **`ooxml-swift v0.13.0–0.14.0`** — true byte-preservation via dirty tracking + raw-element preservation +- **`ooxml-swift v0.12.0`** — preserve-by-default 架構(PreservedArchive + RelationshipIdAllocator + ContentTypesOverlay) +- **`ooxml-swift v0.10.0–0.11.0`** — `FieldParser` + `OMMLParser` readback primitives + `MathAccent` + +## Office.js OOXML Roadmap P0 Closure Map + +| § | Sub-issue | che-word-mcp 版本 | +|---|-----------|-------------------| +| §1 Content Controls (SDT) | [#44](https://github.com/PsychQuant/che-word-mcp/issues/44) | v3.9.0 | +| §2 Track Changes 寫側 | [#45](https://github.com/PsychQuant/che-word-mcp/issues/45) | v3.12.0 | +| §3 Numbering | [#46](https://github.com/PsychQuant/che-word-mcp/issues/46) | v3.10.0 | +| §4 Sections | [#47](https://github.com/PsychQuant/che-word-mcp/issues/47) | v3.10.0 | +| §8 Styles | [#48](https://github.com/PsychQuant/che-word-mcp/issues/48) | v3.10.0 | +| §9 Tables | [#49](https://github.com/PsychQuant/che-word-mcp/issues/49) | v3.11.0 | +| §14 Hyperlinks | [#50](https://github.com/PsychQuant/che-word-mcp/issues/50) | v3.11.0 | +| §16 Headers / Footers | [#51](https://github.com/PsychQuant/che-word-mcp/issues/51) | v3.11.0 | + +Umbrella [#43](https://github.com/PsychQuant/che-word-mcp/issues/43) — closed 2026-04-25。 diff --git a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh new file mode 100755 index 0000000..2d08728 --- /dev/null +++ b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh @@ -0,0 +1,92 @@ +#!/bin/bash +# Version-aware auto-download wrapper for CheWordMCP. +# +# Design: +# - Reads desired version from plugin.json (plugin's intended binary version) +# - Compares against ~/bin/.CheWordMCP.version sidecar +# - Re-downloads when plugin has been updated but binary is stale +# - Atomic file swap (.tmp + mv) so partial downloads never break things +# - Falls back to releases/latest if plugin.json unreadable or pinned tag missing +# +# Fixed in 2.0.1: +# - No version check → now checks plugin.json version against installed sidecar +# - REPO was obsolete fork kiki830621 → now PsychQuant + +set -u + +REPO="PsychQuant/che-word-mcp" +BINARY_NAME="CheWordMCP" +INSTALL_DIR="$HOME/bin" +BINARY="$INSTALL_DIR/$BINARY_NAME" +VERSION_FILE="$INSTALL_DIR/.${BINARY_NAME}.version" + +# Locate plugin root via wrapper's own path (more reliable than $CLAUDE_PLUGIN_ROOT +# which isn't guaranteed in MCP spawn env). Wrapper lives at PLUGIN_ROOT/bin/*.sh. +PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" + +# Read desired version from plugin.json (empty string on any failure → fallback to "latest"). +DESIRED_VERSION="" +if [[ -f "$PLUGIN_JSON" ]]; then + DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ + | head -1 | cut -d'"' -f4 || true) +fi + +# Read currently installed version from sidecar (empty string if file missing/unreadable). +INSTALLED_VERSION="" +[[ -f "$VERSION_FILE" ]] && INSTALLED_VERSION=$(tr -d '[:space:]' < "$VERSION_FILE" 2>/dev/null || true) + +# Decide whether to download. +NEED_DOWNLOAD=false +REASON="" +if [[ ! -x "$BINARY" ]]; then + NEED_DOWNLOAD=true + REASON="binary not installed" +elif [[ -n "$DESIRED_VERSION" ]] && [[ "$INSTALLED_VERSION" != "$DESIRED_VERSION" ]]; then + NEED_DOWNLOAD=true + REASON="plugin wants v${DESIRED_VERSION}, installed is v${INSTALLED_VERSION:-unknown}" +fi + +if $NEED_DOWNLOAD; then + echo "$BINARY_NAME: $REASON — downloading from $REPO..." >&2 + mkdir -p "$INSTALL_DIR" + + # Try pinned tag first, then fall back to latest release. + URL="" + for API_URL in \ + "${DESIRED_VERSION:+https://api.github.com/repos/$REPO/releases/tags/v$DESIRED_VERSION}" \ + "https://api.github.com/repos/$REPO/releases/latest" + do + [[ -z "$API_URL" ]] && continue + URL=$(curl -sL --max-time 30 "$API_URL" 2>/dev/null \ + | grep '"browser_download_url"' | grep "/$BINARY_NAME\"" | head -1 \ + | sed 's/.*"\(https[^"]*\)".*/\1/') + [[ -n "$URL" ]] && break + done + + if [[ -z "$URL" ]]; then + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: WARNING — no download URL found, keeping existing binary" >&2 + else + echo "$BINARY_NAME: ERROR — no download URL found at $REPO. Install manually: https://github.com/$REPO/releases" >&2 + exit 1 + fi + else + if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then + chmod +x "${BINARY}.tmp" + mv "${BINARY}.tmp" "$BINARY" + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${DESIRED_VERSION:-latest}" >&2 + else + rm -f "${BINARY}.tmp" 2>/dev/null + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: WARNING — download failed, keeping existing binary" >&2 + else + echo "$BINARY_NAME: ERROR — download failed" >&2 + exit 1 + fi + fi + fi +fi + +exec "$BINARY" "$@" diff --git a/plugins/che-word-mcp/skills/che-word-mcp/SKILL.md b/plugins/che-word-mcp/skills/che-word-mcp/SKILL.md new file mode 100644 index 0000000..847af23 --- /dev/null +++ b/plugins/che-word-mcp/skills/che-word-mcp/SKILL.md @@ -0,0 +1,261 @@ +--- +name: che-word-mcp +description: Use when working with Microsoft Word (.docx) documents — reading content, creating new documents, modifying text/formatting/structure, working with tables/images/comments/track-changes/SDT/sections/styles/headers/hyperlinks. Swift-native OOXML server, 218+ tools, no Word install required. +--- + +# che-word-mcp + +A Swift-native MCP server for Microsoft Word (.docx) document manipulation. **218+ tools** for reading, writing, and modifying Word documents without requiring Microsoft Word installation. Built on `ooxml-swift` v0.18.0. + +Office.js OOXML Roadmap P0 = **100% complete** ([PsychQuant/che-word-mcp#43](https://github.com/PsychQuant/che-word-mcp/issues/43)). Latest: v3.12.0 ships programmatic Track Changes generation. + +## Two Modes of Operation + +| Mode | Parameter | Use when | Tool count | +|------|-----------|----------|------------| +| **Direct Mode** | `source_path` | Quick read-only access, no state needed | 18 tools | +| **Session Mode** | `doc_id` | Full read/write with open→edit→save lifecycle | All 218+ tools | + +### Direct Mode (`source_path`) + +Pass `source_path` with the .docx path. No `open_document` needed. Best for quick inspection. + +``` +list_images: { "source_path": "/path/to/file.docx" } +search_text: { "source_path": "/path/to/file.docx", "query": "keyword" } +``` + +### Session Mode (`doc_id`) + +Call `open_document` first, then pass `doc_id`. Required for any edit. + +``` +open_document: { "path": "/path/to/file.docx", "doc_id": "mydoc" } +insert_paragraph: { "doc_id": "mydoc", "text": "Hello" } +save_document: { "doc_id": "mydoc", "path": "/path/to/output.docx" } +close_document: { "doc_id": "mydoc" } +``` + +## Core Workflows + +### Reading Documents + +```text +1. open_document(path: "...") → returns doc_id +2. get_document_text(doc_id: "...") → plain text + OR get_paragraphs(doc_id: "...") → with formatting + indices +3. close_document(doc_id: "...") +``` + +### Creating Documents + +```text +1. create_document(doc_id: "report") +2. insert_paragraph(doc_id, text: "Title", style: "Heading1") + insert_table(doc_id, rows: 3, cols: 4, data: [...]) + insert_image(doc_id, path: "/image.png") +3. save_document(doc_id, path: "/out.docx") +``` + +### Modifying Documents + +```text +1. open_document(path: "...") +2. update_paragraph(doc_id, paragraph_index: 0, text: "...") + format_text(doc_id, paragraph_index: 0, bold: true) + replace_text(doc_id, find: "...", replace: "...") +3. save_document(doc_id, path: "...") +``` + +### Track Changes Workflow (v3.12.0+) + +Two paths: **accept/reject existing revisions** OR **generate new revisions programmatically**. + +```text +# Accept/reject existing revisions +1. open_document(path: "...") +2. get_revisions(doc_id) → list with revision ids +3. accept_revision(doc_id, revision_id) / reject_revision(...) + accept_all_revisions(doc_id) / reject_all_revisions(doc_id) + +# Generate new revisions (NEW in v3.12.0) +1. enable_track_changes(doc_id, author: "Reviewer A") +2. insert_text_as_revision(doc_id, paragraph_index, position, text) + delete_text_as_revision(doc_id, paragraph_index, start, end) + move_text_as_revision(doc_id, source/dest) + format_text(doc_id, paragraph_index, bold: true, as_revision: true) + set_paragraph_format(doc_id, paragraph_index, alignment: "center", as_revision: true) +3. save_document(...) +``` + +**Author resolution chain**: explicit `author` arg → `revisions.settings.author` (set at `enable_track_changes`) → `"Unknown"`. + +**Side-effect contract** (important): `as_revision: true` requires track changes to be enabled. If disabled, the call **throws `track_changes_not_enabled`** instead of silently auto-enabling. Call `enable_track_changes` first. + +### Content Controls / SDT (v3.9.0+) + +```text +insert_content_control(doc_id, paragraph_index, control_type: "richText"|"plainText"|"checkBox"|"dropDownList"|"comboBox"|"date"|"picture") +insert_repeating_section(doc_id, ...) +list_content_controls(doc_id) +get_content_control(doc_id, sdt_id) +update_content_control_text(doc_id, sdt_id, text) +replace_content_control_content(doc_id, sdt_id, content) +delete_content_control(doc_id, sdt_id) +``` + +SDT ids are auto-allocated via max+1 pattern (scans body + headers + footers + footnotes + endnotes). + +### Exporting + +```text +export_text(doc_id) → plain text +export_markdown(doc_id) → Markdown +export_revision_summary_markdown(doc_id) → revision summary +export_comment_threads_markdown(doc_id) → comment threads +export_all_images(doc_id, output_dir) +``` + +## Tool Categories (218+ total) + +### Document Lifecycle + +`create_document`, `open_document`, `save_document`, `close_document`, `list_open_documents`, `get_document_info`, `get_document_session_state`, `finalize_document`, `recover_from_autosave`, `revert_to_disk`, `reload_from_disk`, `check_disk_drift`, `checkpoint` + +### Document Properties / Theme / Web Settings + +`get_document_properties`, `set_document_properties`, `get_theme`, `set_theme`, `update_theme_color`, `update_theme_fonts`, `get_web_settings`, `update_web_settings`, `set_language` + +### Content (text + paragraphs) + +`get_text`, `get_document_text`, `get_paragraphs`, `get_paragraph_runs`, `get_text_with_formatting`, `insert_paragraph`, `update_paragraph`, `delete_paragraph`, `replace_text`, `replace_text_batch`, `search_text`, `search_text_batch`, `search_text_with_formatting`, `search_by_formatting`, `list_all_formatted_text` + +### Formatting + +`format_text` (with `as_revision` flag, v3.12.0+), `set_paragraph_format` (with `as_revision` flag), `set_character_spacing`, `set_text_effect`, `set_text_direction`, `set_paragraph_border`, `set_paragraph_shading` + +### Styles (v3.10.0+) + +`list_styles`, `apply_style`, `create_style`, `update_style`, `delete_style` + +### Numbering / Lists (v3.10.0+) + +`insert_bullet_list`, `insert_numbered_list`, `set_list_level`, `set_outline_level` + +### Sections / Page Setup (v3.10.0+) + +`get_section_properties`, `insert_section_break`, `insert_continuous_section_break`, `insert_column_break`, `insert_page_break`, `set_columns`, `set_page_size`, `set_page_margins`, `set_page_orientation`, `set_page_borders`, `set_page_break_before`, `set_keep_lines`, `set_keep_with_next`, `set_widow_orphan`, `set_line_numbers`, `get_word_count_by_section` + +### Tables (v3.11.0+) + +`insert_table`, `get_tables`, `update_cell`, `add_row_to_table`, `delete_row_from_table`, `add_column_to_table`, `delete_column_from_table`, `merge_cells`, `set_cell_width`, `set_cell_vertical_alignment`, `set_row_height`, `set_header_row`, `set_table_alignment`, `set_table_style`, `delete_table` + +### Hyperlinks (v3.11.0+) + +`insert_hyperlink`, `update_hyperlink`, `delete_hyperlink`, `list_hyperlinks`, `insert_internal_link`, `insert_cross_reference` + +### Headers & Footers (v3.11.0+, even/odd + section map) + +`add_header`, `update_header`, `get_header`, `delete_header`, `list_headers`, `add_footer`, `update_footer`, `get_footer`, `delete_footer`, `list_footers`, `insert_page_number` + +### Comments + +`insert_comment`, `update_comment`, `delete_comment`, `list_comments`, `list_comment_threads`, `get_comment_thread`, `reply_to_comment`, `resolve_comment`, `sync_extended_comments`, `add_person`, `update_person`, `delete_person`, `list_people` + +### Track Changes / Revisions (v3.12.0 write side) + +`enable_track_changes`, `disable_track_changes`, `get_revisions`, `accept_revision`, `reject_revision`, `accept_all_revisions`, `reject_all_revisions`, `insert_text_as_revision`, `delete_text_as_revision`, `move_text_as_revision`, `export_revision_summary_markdown` + +### Content Controls / SDT (v3.9.0+) + +`insert_content_control`, `list_content_controls`, `get_content_control`, `update_content_control_text`, `replace_content_control_content`, `delete_content_control`, `insert_repeating_section`, `update_repeating_section_item`, `list_repeating_section_items`, `insert_checkbox`, `insert_dropdown`, `insert_text_field`, `list_custom_xml_parts` + +### Images + +`insert_image`, `insert_image_from_path`, `insert_floating_image`, `update_image`, `delete_image`, `list_images`, `set_image_style`, `export_image`, `export_all_images`, `insert_drop_cap` + +### Footnotes / Endnotes / Equations / Captions + +`insert_footnote`, `update_footnote`, `delete_footnote`, `get_footnote`, `list_footnotes`, `insert_endnote`, `update_endnote`, `delete_endnote`, `get_endnote`, `list_endnotes`, `insert_equation`, `update_equation`, `delete_equation`, `get_equation`, `list_equations`, `insert_caption`, `update_caption`, `delete_caption`, `get_caption`, `list_captions` + +### Bookmarks / Indexes / TOC / Watermarks + +`insert_bookmark`, `delete_bookmark`, `list_bookmarks`, `insert_index`, `insert_index_entry`, `insert_toc`, `insert_table_of_figures`, `insert_watermark`, `insert_image_watermark`, `remove_watermark`, `get_watermark`, `list_watermarks` + +### Fields + +`insert_date_field`, `insert_page_field`, `insert_sequence_field`, `insert_calculation_field`, `insert_if_field`, `insert_merge_field`, `update_all_fields` + +### Layout / Decoration + +`insert_horizontal_line`, `insert_symbol`, `insert_tab_stop`, `clear_tab_stops` + +### Document Protection + +`protect_document`, `unprotect_document`, `set_document_password`, `remove_document_password`, `restrict_editing_region` + +### Compare / Export + +`compare_documents`, `compare_documents_markdown`, `export_text`, `export_markdown` + +## Tips + +1. **Track Changes is enforced by default.** `create_document` and `open_document` auto-enable track changes via `enforceTrackChangesIfNeeded`. Pass `track_changes: false` to `open_document` if you need to bypass enforcement (e.g., authoring tooling that controls revisions itself). +2. **Always save after modifications.** In-memory until `save_document`. +3. **Use `finalize_document` to save+close in one step** when done. +4. **Use styles for consistency** — `apply_style` over manual formatting. +5. **Check structure first** — `get_document_info` / `get_paragraphs` before editing. +6. **Export for AI processing** — `export_markdown` for easier text analysis. +7. **Direct Mode for read-only inspection** — pass `source_path`, skip the open/close lifecycle. + +## Examples + +### Contract Redline (Track Changes write side, v3.12.0) + +```text +1. open_document("/contracts/draft.docx") +2. enable_track_changes(doc_id, author: "Reviewer A") +3. insert_text_as_revision(doc_id, paragraph_index: 5, position: 32, + text: " (subject to escalation)") +4. delete_text_as_revision(doc_id, paragraph_index: 7, start: 23, end: 31) +5. format_text(doc_id, paragraph_index: 9, bold: true, as_revision: true) +6. save_document(doc_id, path: "/contracts/draft-redlined.docx") + +→ Word opens the file with proper // markup, + attributed to "Reviewer A", reviewable in the Review pane. +``` + +### Multi-author Review + +```text +1. enable_track_changes(doc_id, author: "Author A") +2. insert_text_as_revision(...) # → "Author A" + +3. disable_track_changes(doc_id) +4. enable_track_changes(doc_id, author: "Author B") +5. insert_text_as_revision(..., author: "Author C") # explicit override +6. insert_text_as_revision(...) # falls back to "Author B" +``` + +### Fillable Form with Content Controls + +```text +1. create_document(doc_id: "form") +2. insert_paragraph(doc_id, text: "Application Form", style: "Heading1") +3. insert_paragraph(doc_id, text: "Name: ") +4. insert_content_control(doc_id, paragraph_index: 1, control_type: "plainText", + tag: "applicant_name", placeholder: "Enter name...") +5. insert_paragraph(doc_id, text: "Date: ") +6. insert_content_control(doc_id, paragraph_index: 2, control_type: "date", + tag: "submission_date") +7. save_document(doc_id, path: "/forms/application.docx") +``` + +### Extract & Analyze + +```text +1. open_document("/thesis.docx") +2. get_paragraphs(doc_id) + list_comments(doc_id) + get_revisions(doc_id) +3. export_markdown(doc_id) → analyze +4. close_document(doc_id) +``` diff --git a/plugins/macdoc/.claude-plugin/plugin.json b/plugins/macdoc/.claude-plugin/plugin.json new file mode 100644 index 0000000..2a205cd --- /dev/null +++ b/plugins/macdoc/.claude-plugin/plugin.json @@ -0,0 +1,8 @@ +{ + "name": "macdoc", + "description": "macOS 原生文件處理 CLI — 格式轉換、VLM OCR(含 host profile 設定)、SRT 處理", + "version": "1.1.0", + "author": { + "name": "Che Cheng" + } +} diff --git a/plugins/macdoc/CHANGELOG.md b/plugins/macdoc/CHANGELOG.md new file mode 100644 index 0000000..e5b1099 --- /dev/null +++ b/plugins/macdoc/CHANGELOG.md @@ -0,0 +1,17 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +> ⚠ This file was bootstrapped by `changelog-tools:changelog-init` from the +> `plugin.json` description field. Section categorization is best-effort — +> review and refine `Added` / `Changed` / `Fixed` etc. as needed. + +## [Unreleased] + +## [1.1.0] - (date unknown — please fill in) + +### Changed +- macOS 原生文件處理 CLI — 格式轉換、VLM OCR(含 host profile 設定)、SRT 處理 diff --git a/plugins/macdoc/skills/macdoc/SKILL.md b/plugins/macdoc/skills/macdoc/SKILL.md new file mode 100644 index 0000000..a719b8c --- /dev/null +++ b/plugins/macdoc/skills/macdoc/SKILL.md @@ -0,0 +1,332 @@ +--- +name: macdoc +description: | + macOS 原生文件處理 CLI 工具的使用指南。 + 當需要做格式轉換(SRT→HTML、MD→HTML、DOCX→MD)、 + VLM OCR(PDF/圖片→文字)、或 SRT 逐字稿處理時使用。 + 觸發詞:「macdoc」「轉換格式」「OCR」「逐字稿轉HTML」 + 「手寫筆記辨識」「PDF轉文字」 +--- + +# macdoc — macOS 原生文件處理 CLI + +安裝位置:`~/bin/macdoc`(或 `/usr/local/bin/macdoc`) +原始碼:`/Users/che/Developer/macdoc` + +## 子命令總覽 + +| 子命令 | 用途 | 常用場景 | +|--------|------|---------| +| `convert` | 格式轉換 | SRT→HTML、MD→HTML、DOCX→MD | +| `ocr` | VLM OCR | PDF/圖片→文字(手寫筆記辨識) | +| `config` | 設定管理 | AI CLI 工具、OCR host/model 預設值 | +| `pdf` | PDF→LaTeX | 學術 PDF 處理(較少用) | +| `bib` | BibLaTeX→APA | 參考文獻格式轉換 | + +--- + +## convert — 格式轉換 + +```bash +macdoc convert --to [options] +``` + +### 支援格式 + +| --to | 說明 | 範例 | Backend | +|------|------|------|---------| +| `html` | 轉 HTML | SRT→逐字稿網頁、MD→講義網頁 | swift-markdown | +| `md` | 轉 Markdown | DOCX→MD | ooxml-swift | +| `docx` | 轉 Word | MD→DOCX | word-builder-swift | +| `pdf` | 轉 PDF — MD 來源 | MD→PDF(純文字) | textutil | +| `pdf` | 轉 PDF — HTML 來源 | HTML(含 CSS / `@page` / page-break / grid)→ PDF,**完整保留排版** | **playwright Chromium** | +| `json` | 轉 JSON | SRT→結構化 JSON | bib-apa-to-json-swift | + +**HTML→PDF 路徑前置需求**: + +```bash +pip install playwright && playwright install chromium +``` + +完整 CSS / `@page` rule / `page-break-*` / CSS Grid 都正常保留 — **不要**為了避開「textutil 洗 CSS」而繞道用 `chrome --headless` 或 `wkhtmltopdf`,macdoc 已內建 playwright 路徑(#69 實作)。 + +### 常用選項 + +| 選項 | 說明 | +|------|------| +| `--output ` | 輸出檔案路徑 | +| `--full` | 輸出完整 HTML 文件(含 ``),不只是 fragment | +| `--css light` | SRT 轉 HTML 時用淺色主題 | +| `--css dark` | SRT 轉 HTML 時用深色主題 | +| `--hard-breaks` | 軟換行視為硬換行 | +| `--frontmatter` | 包含 YAML frontmatter | +| `--html-extensions` | MD 中保留 `///` | + +### 常用工作流 + +#### HTML(含完整 CSS / 排版)→ PDF + +```bash +# HTML(含 @page / page-break / grid / 自訂 fonts)→ PDF,CSS 完整保留 +macdoc convert --to pdf styled-quote.html --output quote.pdf +``` + +前置需求:`pip install playwright && playwright install chromium`。 + +**不要繞道**用 `chrome --headless` / `wkhtmltopdf` — macdoc 已內建 playwright 路徑(#69 實作)。 + +#### SRT → 可搜尋的逐字稿 HTML + +```bash +# 1. 轉換 +macdoc convert --to html --css light --full --output transcript.html input.srt + +# 2. 注入搜尋和說話者篩選功能(需要 inject-search.py) +python3 inject-search.py transcript.html --speakers "鄭老師:鄭老師,學生名:學生名" +``` + +`inject-search.py` 位於每個 handout 目錄下。 + +#### MD → 講義 HTML + +```bash +macdoc convert --to html --full --output lecture.html notes.md +``` + +產出的是裸 HTML,需要手動替換 `` 加入 CSS 連結和 lecture-header。 + +#### DOCX → Markdown + +```bash +macdoc convert --to md --output output.md input.docx +``` + +--- + +## ocr — VLM OCR + +```bash +macdoc ocr [options] +``` + +用 Vision Language Model 做 OCR,支援手寫筆記、印刷文件、截圖。 + +### Backend 選擇 + +| Backend | 選項 | 說明 | +|---------|------|------| +| **Ollama**(預設) | `--backend ollama` | 透過 Ollama HTTP API,需要先啟動 Ollama | +| **MLX**(本地) | `--backend mlx` | 用 mlx-swift-lm 本地推理(⚠️ 目前有 upstream bug) | + +### Ollama host 設定(v1.1+) + +**推薦流程**:用 `config ocr` 設定好 host profile,之後就不用每次傳 `--host`。 + +```bash +# 一次性設定(本機或遠端 Kyle) +macdoc config ocr add-host kyle localhost:11435 # 先建 SSH tunnel 到 kyle +macdoc config ocr add-host local localhost:11434 # 本機 Ollama +macdoc config ocr set-default kyle # 設為預設 + +# 之後直接 OCR,不用 --host +macdoc ocr handwritten.pdf +``` + +**`--host` 解析規則**:先當 profile 名查 config,找不到才當原始地址。 + +```bash +macdoc ocr file.pdf # 用 default profile +macdoc ocr file.pdf --host local # 切換到 local profile +macdoc ocr file.pdf --host 192.168.1.50:11434 # 不是 profile,當原始地址 +``` + +### SSH tunnel 到 Kyle + +Kyle 的 Mac Studio(M4 Max/128GB)上有 Ollama + glm-ocr: + +```bash +# 建 SSH tunnel(Kyle's Ollama 預設只聽 localhost) +ssh -fN -L 11435:localhost:11434 kyle + +# 確認連線 +curl -s http://localhost:11435/api/tags | python3 -m json.tool + +# OCR(如果已設 default=kyle) +macdoc ocr notes.pdf --output notes.md + +# 長時間 OCR 記得用 caffeinate 防電腦睡眠(SSH tunnel 會斷) +caffeinate -i -- macdoc ocr large.pdf --output large.md +``` + +### 可用模型 + +| 模型 | 用途 | --model 值 | +|------|------|-----------| +| **glm-ocr**(預設) | 中文手寫/印刷 OCR | `glm-ocr` | +| qwen3-vl | 多語言 VLM | `qwen3-vl` | +| minicpm-v | 輕量 VLM | `minicpm-v` | + +### 常用範例 + +```bash +# 手寫筆記 PDF(指定頁碼) +macdoc ocr notes.pdf --pages 1-3 --output notes.md + +# 大型 PDF 分段 OCR(避免長時間 tunnel 斷線) +macdoc ocr big.pdf --pages 1-60 --output part1.md +macdoc ocr big.pdf --pages 61-120 --output part2.md +cat part1.md part2.md > full.md + +# 單張圖片 +macdoc ocr screenshot.png + +# 指定模型(覆寫 config default) +macdoc ocr document.pdf --model qwen3-vl +``` + +### 已知問題 + +- **MLX backend crash**:mlx-swift-lm 有 upstream bug(ml-explore/mlx-swift-lm#191),所有 VLM 模型都會 crash。暫時只能用 Ollama。 +- **SSH tunnel 長時間會斷**:連線超過 2-3 小時會 timeout。解法是分段 OCR(`--pages`)或用 `caffeinate -i`。 +- **大頁面**:超過 8000px 的頁面會被自動縮小。 + +### 批次與並行(77 PDF 轉學考實戰累積) + +當要 OCR 數十張 PDF 或數百頁時,單檔順序跑會花太久。下面是實戰整理出來的 pattern。 + +#### 為什麼先拆 PNG 再 OCR + +直接 `macdoc ocr file.pdf` 在某些 PDF 上會漏頁首 — 模型內部的 PDF→image 路徑可能用低解析度。改成預先用 `pdftoppm` 拆 PNG 再逐頁 OCR,單頁可控、可平行、漏頁可重跑。 + +```bash +# Step 1: 拆 PNG (200 DPI 對手寫/印刷都夠) +mkdir -p out +pdftoppm -r 200 -png file.pdf out/page + +# Step 2: 逐 PNG OCR (見下面 xargs -P pattern) + +# Step 3: 合併 +cat out/page-*.md > full.md +``` + +#### Ollama 並發環境變數 + +跑遠端 Ollama(SSH tunnel 連 Kyle 等)時這幾個變數顯著影響吞吐: + +| 變數 | 建議 | 說明 | +|------|------|------| +| `OLLAMA_NUM_PARALLEL` | 4~8 | 同 model 並發請求數;太高會 OOM | +| `OLLAMA_MAX_LOADED_MODELS` | 1 | 單 model 任務維持 1,避免 thrash | +| `OLLAMA_FLASH_ATTENTION` | 1 | Apple Silicon Metal 後端免費加速 | + +設定方式:在 Ollama server 端的 `~/Library/LaunchAgents/com.ollama.server.plist` 加 `EnvironmentVariables`,或啟動前 `export`,然後 `ollama serve`。 + +#### `xargs -P` 並行 pattern + +```bash +# N=4 並行 (對應 OLLAMA_NUM_PARALLEL=4) +find out -name "page-*.png" | xargs -P 4 -I{} \ + macdoc ocr {} --output "{}.md" --host kyle --model glm-ocr + +# 失敗重試 (找出無 .md 的 png 重跑) +find out -name "page-*.png" | while read png; do + [ -f "${png}.md" ] || echo "$png" +done | xargs -P 2 -I{} macdoc ocr {} --output "{}.md" --host kyle +``` + +#### SSH tunnel 維持 + +長時間批次 OCR(>2 小時)tunnel 會斷。三種策略: + +```bash +# (a) 簡易 — 跑前重建 tunnel,搭配 caffeinate 防 mac sleep +ssh -fN -L 11435:localhost:11434 kyle +caffeinate -i -- xargs -P 4 ... < pages.txt + +# (b) autossh — 自動重連 +brew install autossh +autossh -fN -M 0 -L 11435:localhost:11434 kyle + +# (c) Health check loop — 中途斷 tunnel 自動重建 +while true; do + curl -s --max-time 5 http://localhost:11435/api/tags >/dev/null \ + || ssh -fN -L 11435:localhost:11434 kyle + sleep 60 +done & +``` + +實戰建議:走 (b) autossh + `caffeinate -i`,踩坑成本最低。 + +#### 與 CLI `--parallel` 的整合(roadmap) + +`PsychQuant/macdoc#73` 追蹤把 `--parallel N` 整合進 macdoc CLI(內建 `xargs -P` 邏輯 + 失敗重試 + tunnel health check)。CLI 落地後上面那段 pattern 會被取代成: + +```bash +macdoc ocr-batch out/*.png --parallel 4 --host kyle # roadmap, 尚未實作 +``` + +在那之前,沿用 `xargs -P` 即可。另一個正在被討論的方向是新建 `batch-ocr` plugin 把 PDF→PNG→OCR→merge 整個 pipeline 包成 single command,見 PsychQuant/psychquant-claude-plugins#6。 + +--- + +## config — 設定管理 + +設定檔存在 `~/.config/macdoc/config.json`。 + +### config ai — AI CLI 工具設定 + +```bash +macdoc config ai detect # 偵測本機已安裝的 codex/claude/gemini +macdoc config ai list # 顯示目前設定 +macdoc config ai set transcription codex # 設定 one-shot 轉寫預設後端 +macdoc config ai set agent claude # 設定 agentic 後端 +``` + +### config ocr — OCR host/model 設定(v1.1+) + +| 子命令 | 用途 | +|--------|------| +| `list` | 顯示目前 OCR 設定(含 profile 列表) | +| `add-host ` | 新增/更新 host profile | +| `remove-host ` | 移除 profile | +| `set-default ` | 設定預設 host | +| `set-model ` | 設定預設模型(如 glm-ocr) | +| `set-backend ` | 設定預設後端 | + +```bash +# 完整範例:設定 kyle 遠端 + local 兩個 profile +ssh -fN -L 11435:localhost:11434 kyle # 建 tunnel +macdoc config ocr add-host kyle localhost:11435 +macdoc config ocr add-host local localhost:11434 +macdoc config ocr set-default kyle +macdoc config ocr set-model glm-ocr + +# 查看 +macdoc config ocr list +# === OCR 設定 === +# backend: ollama +# model: glm-ocr +# default host: kyle → localhost:11435 +# +# === Host Profiles === +# kyle → localhost:11435 ★ +# local → localhost:11434 +``` + +--- + +## 與其他工具的搭配 + +| 場景 | 工具組合 | +|------|---------| +| 手寫筆記 → TikZ 圖 | `macdoc ocr` → 辨識內容 → 寫 TikZ → `xelatex` 編譯 | +| SRT → handout 網頁 | `macdoc convert --to html` → `inject-search.py` | +| PDF 筆記 → PNG | `pdftoppm -png -r 200`(不是 macdoc,是 poppler) | +| 學生作業 .docx → 閱讀 | 用 che-word-mcp 的 `get_document_text`(不需要 macdoc) | + +--- + +## 版本紀錄 + +- **1.1.0**:新增 `config ocr` 子命令組,支援具名 host profile(`--host kyle` 等),預設 host/model 可存 config +- **1.0.0**:初版 From 79d11095127c3fd11733ab52763295e9b4171c76 Mon Sep 17 00:00:00 2001 From: che cheng Date: Thu, 2 Jul 2026 13:35:52 +0800 Subject: [PATCH 2/6] chore: bump che-pdf-mcp submodule to v0.1.0 (MCP.Metadata build fix) (#112) Refs #112, PsychQuant/che-pdf-mcp#1 --- mcp/che-pdf-mcp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mcp/che-pdf-mcp b/mcp/che-pdf-mcp index f496c47..10058a4 160000 --- a/mcp/che-pdf-mcp +++ b/mcp/che-pdf-mcp @@ -1 +1 @@ -Subproject commit f496c47742ed02201316127b712d333409604ee4 +Subproject commit 10058a452bb2580381e3585821211a8698eb742b From 8ded5179b6d8670b482441184956301371508a40 Mon Sep 17 00:00:00 2001 From: che cheng Date: Thu, 2 Jul 2026 13:49:37 +0800 Subject: [PATCH 3/6] fix: add sha256 + code-signature verification to new MCP wrappers (#112) Push security review flagged supply-chain / unverified remote code execution in the two new auto-download wrappers. Downloads are now verified against the release .sha256 asset (fail-closed on mismatch) and must carry a valid code signature from Team 6W377FS7BS before exec. che-word-mcp wrapper (verbatim migration copy) tracked separately. Refs #112 --- plugins/che-pdf-mcp/CHANGELOG.md | 1 + .../che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh | 24 +++++++++++++++++++ plugins/che-pptx-mcp/CHANGELOG.md | 1 + .../che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh | 24 +++++++++++++++++++ 4 files changed, 50 insertions(+) diff --git a/plugins/che-pdf-mcp/CHANGELOG.md b/plugins/che-pdf-mcp/CHANGELOG.md index 18a6939..ff22add 100644 --- a/plugins/che-pdf-mcp/CHANGELOG.md +++ b/plugins/che-pdf-mcp/CHANGELOG.md @@ -9,4 +9,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added - 首次 marketplace 發布(PsychQuant/macdoc marketplace,Refs PsychQuant/macdoc#112)。 +- Wrapper 供應鏈驗證:release `.sha256` asset 比對(mismatch 拒裝)+ `codesign` TeamIdentifier `6W377FS7BS` 硬閘(不過即刪除、保留既有 binary)— per #112 push security review。 - `.mcp.json` + version-aware auto-download wrapper(自 `PsychQuant/che-pdf-mcp` GitHub Releases 下載 signed + notarized universal binary)。 diff --git a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh index 32d23bd..f79a26e 100755 --- a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh +++ b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh @@ -70,6 +70,30 @@ if $NEED_DOWNLOAD; then fi else if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then + # --- Supply-chain verification (PsychQuant/macdoc#112 security review) --- + # 1. sha256: compare against the release's .sha256 asset when present + # (fail-closed on mismatch; warn-and-continue when asset missing). + EXPECTED_SHA=$(curl -sL --max-time 30 "${URL}.sha256" 2>/dev/null | tr -d '[:space:]' | head -c 64) + if [[ ${#EXPECTED_SHA} -eq 64 ]]; then + ACTUAL_SHA=$(shasum -a 256 "${BINARY}.tmp" | awk '{print $1}') + if [[ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]]; then + rm -f "${BINARY}.tmp" + echo "$BINARY_NAME: ERROR — sha256 mismatch against release asset; refusing to install" >&2 + [[ -x "$BINARY" ]] && exec "$BINARY" "$@" + exit 1 + fi + else + echo "$BINARY_NAME: WARNING — no .sha256 asset found; relying on code-signature check" >&2 + fi + # 2. Code signature: require a valid signature from Team 6W377FS7BS + # (Developer ID, CHE CHENG) before executing anything downloaded. + if ! codesign --verify --strict "${BINARY}.tmp" 2>/dev/null || \ + ! codesign -dvv "${BINARY}.tmp" 2>&1 | grep -q "TeamIdentifier=6W377FS7BS"; then + rm -f "${BINARY}.tmp" + echo "$BINARY_NAME: ERROR — code-signature verification failed (not signed by expected Team ID); refusing to install" >&2 + [[ -x "$BINARY" ]] && exec "$BINARY" "$@" + exit 1 + fi chmod +x "${BINARY}.tmp" mv "${BINARY}.tmp" "$BINARY" echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" diff --git a/plugins/che-pptx-mcp/CHANGELOG.md b/plugins/che-pptx-mcp/CHANGELOG.md index 37a118a..ba0dbd6 100644 --- a/plugins/che-pptx-mcp/CHANGELOG.md +++ b/plugins/che-pptx-mcp/CHANGELOG.md @@ -9,4 +9,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added - 首次 marketplace 發布(PsychQuant/macdoc marketplace,Refs PsychQuant/macdoc#112)。 +- Wrapper 供應鏈驗證:release `.sha256` asset 比對(mismatch 拒裝)+ `codesign` TeamIdentifier `6W377FS7BS` 硬閘(不過即刪除、保留既有 binary)— per #112 push security review。 - `.mcp.json` + version-aware auto-download wrapper(自 `PsychQuant/che-pptx-mcp` GitHub Releases 下載 signed + notarized universal binary)。 diff --git a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh index 8373117..07a6f4e 100755 --- a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh +++ b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh @@ -70,6 +70,30 @@ if $NEED_DOWNLOAD; then fi else if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then + # --- Supply-chain verification (PsychQuant/macdoc#112 security review) --- + # 1. sha256: compare against the release's .sha256 asset when present + # (fail-closed on mismatch; warn-and-continue when asset missing). + EXPECTED_SHA=$(curl -sL --max-time 30 "${URL}.sha256" 2>/dev/null | tr -d '[:space:]' | head -c 64) + if [[ ${#EXPECTED_SHA} -eq 64 ]]; then + ACTUAL_SHA=$(shasum -a 256 "${BINARY}.tmp" | awk '{print $1}') + if [[ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]]; then + rm -f "${BINARY}.tmp" + echo "$BINARY_NAME: ERROR — sha256 mismatch against release asset; refusing to install" >&2 + [[ -x "$BINARY" ]] && exec "$BINARY" "$@" + exit 1 + fi + else + echo "$BINARY_NAME: WARNING — no .sha256 asset found; relying on code-signature check" >&2 + fi + # 2. Code signature: require a valid signature from Team 6W377FS7BS + # (Developer ID, CHE CHENG) before executing anything downloaded. + if ! codesign --verify --strict "${BINARY}.tmp" 2>/dev/null || \ + ! codesign -dvv "${BINARY}.tmp" 2>&1 | grep -q "TeamIdentifier=6W377FS7BS"; then + rm -f "${BINARY}.tmp" + echo "$BINARY_NAME: ERROR — code-signature verification failed (not signed by expected Team ID); refusing to install" >&2 + [[ -x "$BINARY" ]] && exec "$BINARY" "$@" + exit 1 + fi chmod +x "${BINARY}.tmp" mv "${BINARY}.tmp" "$BINARY" echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" From 813bae4a5ded43fc628ea6c807ce0c3e4f70d008 Mon Sep 17 00:00:00 2001 From: che cheng Date: Thu, 2 Jul 2026 14:38:31 +0800 Subject: [PATCH 4/6] fix: sound supply-chain verification in all 3 MCP wrappers (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify round 1 (5 Opus reviewers + Codex) found the codesign gate bypassable: --verify --strict accepts ad-hoc signatures and the TeamIdentifier grep matches the attacker-controlled Identifier field (empirically reproduced by 3 independent parties). Replaced with a requirement-based check pinning the Apple chain + Team OU 6W377FS7BS. Also per R1 findings: - sha256 verification now MANDATORY fail-closed (was warn-and-continue) - pinned versions no longer fall back to releases/latest; sidecar records the resolved version from the effective download URL - API-free direct-download endpoints (unauthenticated api.github.com rate limit made the wrapper fragile — surfaced by E2E) - mktemp unique temp file + curl -f --proto '=https' --tlsv1.2 - che-word-mcp wrapper now uses the same hardened template (R1 HIGH-2; version intentionally NOT bumped — plugin.json version pins the binary release tag; decoupling tracked in #116) - CHANGELOG/README/CLAUDE.md wording corrected to match actual posture (DA merge precondition: no false security documentation) Upstream: CheWordMCP v3.20.0 release asset was ad-hoc signed — re-signed with Developer ID + notarized, assets replaced (che-word-mcp#165). Refs #112 --- CLAUDE.md | 2 +- README.md | 2 +- plugins/che-pdf-mcp/CHANGELOG.md | 2 +- .../che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh | 131 +++++++++--------- plugins/che-pptx-mcp/CHANGELOG.md | 2 +- .../che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh | 131 +++++++++--------- plugins/che-word-mcp/CHANGELOG.md | 6 + .../che-word-mcp/bin/che-word-mcp-wrapper.sh | 110 +++++++++------ 8 files changed, 214 insertions(+), 172 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 85af657..bb17c6b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,7 +35,7 @@ This file provides guidance to Claude Code when working with code in this reposi **macdoc** 是一個原生 macOS 文件處理工具集,專注於文件格式解析、轉換和 OCR 功能。整個專案使用 Swift 開發,充分利用 Apple 平台的原生能力。 -本 repo 同時是 **Claude Code plugin marketplace**(`.claude-plugin/marketplace.json` + `plugins/`,2026-07 起,#112):發布 `che-word-mcp`、`che-pdf-mcp`、`che-pptx-mcp`、`macdoc` 四個 plugins,使用者以 `claude plugin marketplace add PsychQuant/macdoc` 安裝。注意 `plugins/`(plugin shells,正常入版控)與 `packages/`(gitignored 本地套件)的差異;MCP shells 的 wrapper 從各 binary repo 的 GitHub Releases 自動下載 signed binary。發布新版時同步 bump `plugins//.claude-plugin/plugin.json` 與 `.claude-plugin/marketplace.json` 兩處版本。 +本 repo 同時是 **Claude Code plugin marketplace**(`.claude-plugin/marketplace.json` + `plugins/`,2026-07 起,#112):發布 `che-word-mcp`、`che-pdf-mcp`、`che-pptx-mcp`、`macdoc` 四個 plugins,使用者以 `claude plugin marketplace add PsychQuant/macdoc` 安裝。注意 `plugins/`(plugin shells,正常入版控)與 `packages/`(gitignored 本地套件)的差異;MCP shells 的 wrapper 從各 binary repo 的 GitHub Releases 自動下載 binary,安裝前強制驗證 sha256 + Developer ID 簽章鏈(requirement-based codesign,Team `6W377FS7BS`)。發布新版時同步 bump `plugins//.claude-plugin/plugin.json` 與 `.claude-plugin/marketplace.json` 兩處版本。 ## Project Structure diff --git a/README.md b/README.md index e4d8aae..40e6dcb 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ claude plugin marketplace add PsychQuant/macdoc claude plugin install che-word-mcp@macdoc # 或 che-pdf-mcp / che-pptx-mcp / macdoc ``` -MCP plugins 的 wrapper 會自動從各 repo 的 GitHub Releases 下載 signed + notarized universal binary。 +MCP plugins 的 wrapper 會自動從各 repo 的 GitHub Releases 下載 universal binary,並在安裝前**強制驗證** sha256 與 Developer ID 簽章鏈(Team `6W377FS7BS`);驗證不過即拒裝。 > 遷移註記:`che-word-mcp` 與 `macdoc` 兩個 plugins 原先發布於 `psychquant-claude-plugins` marketplace,自 2026-07 起以本 marketplace 為準。 diff --git a/plugins/che-pdf-mcp/CHANGELOG.md b/plugins/che-pdf-mcp/CHANGELOG.md index ff22add..da00f70 100644 --- a/plugins/che-pdf-mcp/CHANGELOG.md +++ b/plugins/che-pdf-mcp/CHANGELOG.md @@ -9,5 +9,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added - 首次 marketplace 發布(PsychQuant/macdoc marketplace,Refs PsychQuant/macdoc#112)。 -- Wrapper 供應鏈驗證:release `.sha256` asset 比對(mismatch 拒裝)+ `codesign` TeamIdentifier `6W377FS7BS` 硬閘(不過即刪除、保留既有 binary)— per #112 push security review。 +- Wrapper 供應鏈驗證(#112 security review R1+R2):sha256 asset 比對為**強制**(缺失/格式錯/mismatch 均拒裝,integrity gate)+ requirement-based `codesign` 驗證鏈定 Apple anchor + Team OU `6W377FS7BS`(authenticity gate — 取代可被 Identifier 欄位偽造的 grep 形式)+ pinned version 不 fallback latest + `curl -f --proto '=https'` + mktemp 唯一暫存檔。驗證失敗一律保留既有 binary(fail-to-known-good)。 - `.mcp.json` + version-aware auto-download wrapper(自 `PsychQuant/che-pdf-mcp` GitHub Releases 下載 signed + notarized universal binary)。 diff --git a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh index f79a26e..ee76f8e 100755 --- a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh +++ b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh @@ -5,9 +5,20 @@ # - Reads desired version from plugin.json (plugin's intended binary version) # - Compares against ~/bin/.ChePDFMCP.version sidecar # - Re-downloads when plugin has been updated but binary is stale -# - Atomic file swap (.tmp + mv) so partial downloads never break things -# - Falls back to releases/latest if plugin.json unreadable or pinned tag missing +# - Unique temp file (mktemp, same fs) + atomic mv so partial downloads never break things +# - Pinned version does NOT fall back to releases/latest (supply-chain pinning); +# latest is used only when plugin.json carries no version # +# Supply-chain verification (PsychQuant/macdoc#112 security review R1+R2): +# - sha256 (MANDATORY): release must ship ChePDFMCP.sha256; missing/malformed/ +# mismatching asset refuses install (fail-closed integrity gate) +# - Code signature (AUTHENTICITY): requirement-based codesign check pins the +# Apple chain + Team OU 6W377FS7BS. NOTE: a grep on `codesign -dvv` output is +# spoofable via the attacker-controlled Identifier field, and --verify alone +# accepts ad-hoc signatures (empirically reproduced in #112 verify round 1) — +# only the -R requirement form is sound. +# - On any verification failure: keep + exec the existing binary if present +# (fail-to-known-good), else exit 1. set -u @@ -16,20 +27,32 @@ BINARY_NAME="ChePDFMCP" INSTALL_DIR="$HOME/bin" BINARY="$INSTALL_DIR/$BINARY_NAME" VERSION_FILE="$INSTALL_DIR/.${BINARY_NAME}.version" +SCRIPT_ARGS=("$@") # Locate plugin root via wrapper's own path (more reliable than $CLAUDE_PLUGIN_ROOT # which isn't guaranteed in MCP spawn env). Wrapper lives at PLUGIN_ROOT/bin/*.sh. PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" -# Read desired version from plugin.json (empty string on any failure → fallback to "latest"). +run_existing_or_die() { + # $1 = error message. Fail-to-known-good: prefer the already-installed + # binary over aborting the MCP server spawn entirely. + echo "$BINARY_NAME: ERROR — $1" >&2 + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: keeping existing binary" >&2 + exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} + fi + exit 1 +} + +# Read desired version from plugin.json (empty string on any failure → latest). DESIRED_VERSION="" if [[ -f "$PLUGIN_JSON" ]]; then DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ | head -1 | cut -d'"' -f4 || true) fi -# Read currently installed version from sidecar (empty string if file missing/unreadable). +# Read currently installed version from sidecar (empty string if missing). INSTALLED_VERSION="" [[ -f "$VERSION_FILE" ]] && INSTALLED_VERSION=$(tr -d '[:space:]' < "$VERSION_FILE" 2>/dev/null || true) @@ -48,66 +71,48 @@ if $NEED_DOWNLOAD; then echo "$BINARY_NAME: $REASON — downloading from $REPO..." >&2 mkdir -p "$INSTALL_DIR" - # Try pinned tag first, then fall back to latest release. - URL="" - for API_URL in \ - "${DESIRED_VERSION:+https://api.github.com/repos/$REPO/releases/tags/v$DESIRED_VERSION}" \ - "https://api.github.com/repos/$REPO/releases/latest" - do - [[ -z "$API_URL" ]] && continue - URL=$(curl -sL --max-time 30 "$API_URL" 2>/dev/null \ - | grep '"browser_download_url"' | grep "/$BINARY_NAME\"" | head -1 \ - | sed 's/.*"\(https[^"]*\)".*/\1/') - [[ -n "$URL" ]] && break - done - - if [[ -z "$URL" ]]; then - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: WARNING — no download URL found, keeping existing binary" >&2 - else - echo "$BINARY_NAME: ERROR — no download URL found at $REPO. Install manually: https://github.com/$REPO/releases" >&2 - exit 1 - fi + # Resolve release via the API-free direct-download endpoints (unauthenticated + # api.github.com is rate-limited to 60 req/hr per IP and fails closed here; + # the /releases/download/ redirect endpoints have no such limit). + # Pinned version does NOT fall back to latest — a missing pinned tag is a + # release-channel fault, not a downgrade licence. + if [[ -n "$DESIRED_VERSION" ]]; then + URL="https://github.com/$REPO/releases/download/v$DESIRED_VERSION/$BINARY_NAME" + TARGET_DESC="v$DESIRED_VERSION" else - if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then - # --- Supply-chain verification (PsychQuant/macdoc#112 security review) --- - # 1. sha256: compare against the release's .sha256 asset when present - # (fail-closed on mismatch; warn-and-continue when asset missing). - EXPECTED_SHA=$(curl -sL --max-time 30 "${URL}.sha256" 2>/dev/null | tr -d '[:space:]' | head -c 64) - if [[ ${#EXPECTED_SHA} -eq 64 ]]; then - ACTUAL_SHA=$(shasum -a 256 "${BINARY}.tmp" | awk '{print $1}') - if [[ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]]; then - rm -f "${BINARY}.tmp" - echo "$BINARY_NAME: ERROR — sha256 mismatch against release asset; refusing to install" >&2 - [[ -x "$BINARY" ]] && exec "$BINARY" "$@" - exit 1 - fi - else - echo "$BINARY_NAME: WARNING — no .sha256 asset found; relying on code-signature check" >&2 - fi - # 2. Code signature: require a valid signature from Team 6W377FS7BS - # (Developer ID, CHE CHENG) before executing anything downloaded. - if ! codesign --verify --strict "${BINARY}.tmp" 2>/dev/null || \ - ! codesign -dvv "${BINARY}.tmp" 2>&1 | grep -q "TeamIdentifier=6W377FS7BS"; then - rm -f "${BINARY}.tmp" - echo "$BINARY_NAME: ERROR — code-signature verification failed (not signed by expected Team ID); refusing to install" >&2 - [[ -x "$BINARY" ]] && exec "$BINARY" "$@" - exit 1 - fi - chmod +x "${BINARY}.tmp" - mv "${BINARY}.tmp" "$BINARY" - echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" - echo "$BINARY_NAME: installed v${DESIRED_VERSION:-latest}" >&2 - else - rm -f "${BINARY}.tmp" 2>/dev/null - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: WARNING — download failed, keeping existing binary" >&2 - else - echo "$BINARY_NAME: ERROR — download failed" >&2 - exit 1 - fi - fi + URL="https://github.com/$REPO/releases/latest/download/$BINARY_NAME" + TARGET_DESC="latest" fi + + TMP_FILE=$(mktemp "$INSTALL_DIR/.${BINARY_NAME}.download.XXXXXX") || run_existing_or_die "mktemp failed" + trap 'rm -f "$TMP_FILE"' EXIT + + # -w url_effective: after redirects the final URL contains /download/vX.Y.Z/, + # which is the authoritative resolved version (needed for the latest path). + EFFECTIVE_URL=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" -w '%{url_effective}' 2>/dev/null) \ + || run_existing_or_die "download failed for $TARGET_DESC at $REPO (pinned versions do not fall back to latest). Install manually: https://github.com/$REPO/releases" + RESOLVED_VERSION=$(printf '%s' "$EFFECTIVE_URL" | sed -En 's#.*/download/v?([^/]+)/[^/]+$#\1#p') + + # 1. sha256 — mandatory fail-closed integrity gate. + EXPECTED_SHA=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 30 "${URL}.sha256" 2>/dev/null \ + | head -1 | awk '{print $1}') + [[ "$EXPECTED_SHA" =~ ^[0-9a-fA-F]{64}$ ]] \ + || run_existing_or_die "missing/malformed .sha256 release asset — refusing to install" + ACTUAL_SHA=$(shasum -a 256 "$TMP_FILE" | awk '{print $1}') + [[ "$ACTUAL_SHA" == "$EXPECTED_SHA" ]] \ + || run_existing_or_die "sha256 mismatch against release asset — refusing to install" + + # 2. Code signature — requirement-based authenticity gate (see header). + codesign --verify --strict \ + -R '=anchor apple generic and certificate leaf[subject.OU] = "6W377FS7BS"' \ + "$TMP_FILE" 2>/dev/null \ + || run_existing_or_die "code-signature verification failed (not Developer ID Team 6W377FS7BS) — refusing to install" + + chmod +x "$TMP_FILE" + mv "$TMP_FILE" "$BINARY" + trap - EXIT + echo "${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}} (sha256 + Developer ID verified)" >&2 fi -exec "$BINARY" "$@" +exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} diff --git a/plugins/che-pptx-mcp/CHANGELOG.md b/plugins/che-pptx-mcp/CHANGELOG.md index ba0dbd6..a9184e7 100644 --- a/plugins/che-pptx-mcp/CHANGELOG.md +++ b/plugins/che-pptx-mcp/CHANGELOG.md @@ -9,5 +9,5 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ### Added - 首次 marketplace 發布(PsychQuant/macdoc marketplace,Refs PsychQuant/macdoc#112)。 -- Wrapper 供應鏈驗證:release `.sha256` asset 比對(mismatch 拒裝)+ `codesign` TeamIdentifier `6W377FS7BS` 硬閘(不過即刪除、保留既有 binary)— per #112 push security review。 +- Wrapper 供應鏈驗證(#112 security review R1+R2):sha256 asset 比對為**強制**(缺失/格式錯/mismatch 均拒裝,integrity gate)+ requirement-based `codesign` 驗證鏈定 Apple anchor + Team OU `6W377FS7BS`(authenticity gate — 取代可被 Identifier 欄位偽造的 grep 形式)+ pinned version 不 fallback latest + `curl -f --proto '=https'` + mktemp 唯一暫存檔。驗證失敗一律保留既有 binary(fail-to-known-good)。 - `.mcp.json` + version-aware auto-download wrapper(自 `PsychQuant/che-pptx-mcp` GitHub Releases 下載 signed + notarized universal binary)。 diff --git a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh index 07a6f4e..f041f40 100755 --- a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh +++ b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh @@ -5,9 +5,20 @@ # - Reads desired version from plugin.json (plugin's intended binary version) # - Compares against ~/bin/.ChePPTXMCP.version sidecar # - Re-downloads when plugin has been updated but binary is stale -# - Atomic file swap (.tmp + mv) so partial downloads never break things -# - Falls back to releases/latest if plugin.json unreadable or pinned tag missing +# - Unique temp file (mktemp, same fs) + atomic mv so partial downloads never break things +# - Pinned version does NOT fall back to releases/latest (supply-chain pinning); +# latest is used only when plugin.json carries no version # +# Supply-chain verification (PsychQuant/macdoc#112 security review R1+R2): +# - sha256 (MANDATORY): release must ship ChePPTXMCP.sha256; missing/malformed/ +# mismatching asset refuses install (fail-closed integrity gate) +# - Code signature (AUTHENTICITY): requirement-based codesign check pins the +# Apple chain + Team OU 6W377FS7BS. NOTE: a grep on `codesign -dvv` output is +# spoofable via the attacker-controlled Identifier field, and --verify alone +# accepts ad-hoc signatures (empirically reproduced in #112 verify round 1) — +# only the -R requirement form is sound. +# - On any verification failure: keep + exec the existing binary if present +# (fail-to-known-good), else exit 1. set -u @@ -16,20 +27,32 @@ BINARY_NAME="ChePPTXMCP" INSTALL_DIR="$HOME/bin" BINARY="$INSTALL_DIR/$BINARY_NAME" VERSION_FILE="$INSTALL_DIR/.${BINARY_NAME}.version" +SCRIPT_ARGS=("$@") # Locate plugin root via wrapper's own path (more reliable than $CLAUDE_PLUGIN_ROOT # which isn't guaranteed in MCP spawn env). Wrapper lives at PLUGIN_ROOT/bin/*.sh. PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" -# Read desired version from plugin.json (empty string on any failure → fallback to "latest"). +run_existing_or_die() { + # $1 = error message. Fail-to-known-good: prefer the already-installed + # binary over aborting the MCP server spawn entirely. + echo "$BINARY_NAME: ERROR — $1" >&2 + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: keeping existing binary" >&2 + exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} + fi + exit 1 +} + +# Read desired version from plugin.json (empty string on any failure → latest). DESIRED_VERSION="" if [[ -f "$PLUGIN_JSON" ]]; then DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ | head -1 | cut -d'"' -f4 || true) fi -# Read currently installed version from sidecar (empty string if file missing/unreadable). +# Read currently installed version from sidecar (empty string if missing). INSTALLED_VERSION="" [[ -f "$VERSION_FILE" ]] && INSTALLED_VERSION=$(tr -d '[:space:]' < "$VERSION_FILE" 2>/dev/null || true) @@ -48,66 +71,48 @@ if $NEED_DOWNLOAD; then echo "$BINARY_NAME: $REASON — downloading from $REPO..." >&2 mkdir -p "$INSTALL_DIR" - # Try pinned tag first, then fall back to latest release. - URL="" - for API_URL in \ - "${DESIRED_VERSION:+https://api.github.com/repos/$REPO/releases/tags/v$DESIRED_VERSION}" \ - "https://api.github.com/repos/$REPO/releases/latest" - do - [[ -z "$API_URL" ]] && continue - URL=$(curl -sL --max-time 30 "$API_URL" 2>/dev/null \ - | grep '"browser_download_url"' | grep "/$BINARY_NAME\"" | head -1 \ - | sed 's/.*"\(https[^"]*\)".*/\1/') - [[ -n "$URL" ]] && break - done - - if [[ -z "$URL" ]]; then - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: WARNING — no download URL found, keeping existing binary" >&2 - else - echo "$BINARY_NAME: ERROR — no download URL found at $REPO. Install manually: https://github.com/$REPO/releases" >&2 - exit 1 - fi + # Resolve release via the API-free direct-download endpoints (unauthenticated + # api.github.com is rate-limited to 60 req/hr per IP and fails closed here; + # the /releases/download/ redirect endpoints have no such limit). + # Pinned version does NOT fall back to latest — a missing pinned tag is a + # release-channel fault, not a downgrade licence. + if [[ -n "$DESIRED_VERSION" ]]; then + URL="https://github.com/$REPO/releases/download/v$DESIRED_VERSION/$BINARY_NAME" + TARGET_DESC="v$DESIRED_VERSION" else - if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then - # --- Supply-chain verification (PsychQuant/macdoc#112 security review) --- - # 1. sha256: compare against the release's .sha256 asset when present - # (fail-closed on mismatch; warn-and-continue when asset missing). - EXPECTED_SHA=$(curl -sL --max-time 30 "${URL}.sha256" 2>/dev/null | tr -d '[:space:]' | head -c 64) - if [[ ${#EXPECTED_SHA} -eq 64 ]]; then - ACTUAL_SHA=$(shasum -a 256 "${BINARY}.tmp" | awk '{print $1}') - if [[ "$ACTUAL_SHA" != "$EXPECTED_SHA" ]]; then - rm -f "${BINARY}.tmp" - echo "$BINARY_NAME: ERROR — sha256 mismatch against release asset; refusing to install" >&2 - [[ -x "$BINARY" ]] && exec "$BINARY" "$@" - exit 1 - fi - else - echo "$BINARY_NAME: WARNING — no .sha256 asset found; relying on code-signature check" >&2 - fi - # 2. Code signature: require a valid signature from Team 6W377FS7BS - # (Developer ID, CHE CHENG) before executing anything downloaded. - if ! codesign --verify --strict "${BINARY}.tmp" 2>/dev/null || \ - ! codesign -dvv "${BINARY}.tmp" 2>&1 | grep -q "TeamIdentifier=6W377FS7BS"; then - rm -f "${BINARY}.tmp" - echo "$BINARY_NAME: ERROR — code-signature verification failed (not signed by expected Team ID); refusing to install" >&2 - [[ -x "$BINARY" ]] && exec "$BINARY" "$@" - exit 1 - fi - chmod +x "${BINARY}.tmp" - mv "${BINARY}.tmp" "$BINARY" - echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" - echo "$BINARY_NAME: installed v${DESIRED_VERSION:-latest}" >&2 - else - rm -f "${BINARY}.tmp" 2>/dev/null - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: WARNING — download failed, keeping existing binary" >&2 - else - echo "$BINARY_NAME: ERROR — download failed" >&2 - exit 1 - fi - fi + URL="https://github.com/$REPO/releases/latest/download/$BINARY_NAME" + TARGET_DESC="latest" fi + + TMP_FILE=$(mktemp "$INSTALL_DIR/.${BINARY_NAME}.download.XXXXXX") || run_existing_or_die "mktemp failed" + trap 'rm -f "$TMP_FILE"' EXIT + + # -w url_effective: after redirects the final URL contains /download/vX.Y.Z/, + # which is the authoritative resolved version (needed for the latest path). + EFFECTIVE_URL=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" -w '%{url_effective}' 2>/dev/null) \ + || run_existing_or_die "download failed for $TARGET_DESC at $REPO (pinned versions do not fall back to latest). Install manually: https://github.com/$REPO/releases" + RESOLVED_VERSION=$(printf '%s' "$EFFECTIVE_URL" | sed -En 's#.*/download/v?([^/]+)/[^/]+$#\1#p') + + # 1. sha256 — mandatory fail-closed integrity gate. + EXPECTED_SHA=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 30 "${URL}.sha256" 2>/dev/null \ + | head -1 | awk '{print $1}') + [[ "$EXPECTED_SHA" =~ ^[0-9a-fA-F]{64}$ ]] \ + || run_existing_or_die "missing/malformed .sha256 release asset — refusing to install" + ACTUAL_SHA=$(shasum -a 256 "$TMP_FILE" | awk '{print $1}') + [[ "$ACTUAL_SHA" == "$EXPECTED_SHA" ]] \ + || run_existing_or_die "sha256 mismatch against release asset — refusing to install" + + # 2. Code signature — requirement-based authenticity gate (see header). + codesign --verify --strict \ + -R '=anchor apple generic and certificate leaf[subject.OU] = "6W377FS7BS"' \ + "$TMP_FILE" 2>/dev/null \ + || run_existing_or_die "code-signature verification failed (not Developer ID Team 6W377FS7BS) — refusing to install" + + chmod +x "$TMP_FILE" + mv "$TMP_FILE" "$BINARY" + trap - EXIT + echo "${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}} (sha256 + Developer ID verified)" >&2 fi -exec "$BINARY" "$@" +exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} diff --git a/plugins/che-word-mcp/CHANGELOG.md b/plugins/che-word-mcp/CHANGELOG.md index 3339897..202905d 100644 --- a/plugins/che-word-mcp/CHANGELOG.md +++ b/plugins/che-word-mcp/CHANGELOG.md @@ -9,6 +9,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 > `plugin.json` description field. Section categorization is best-effort — > review and refine `Added` / `Changed` / `Fixed` etc. as needed. +## [Unreleased — macdoc marketplace shell] + +### Security + +- Wrapper 改用硬化模板(同 che-pdf-mcp / che-pptx-mcp v0.1.0):強制 sha256 + requirement-based codesign(Team OU 6W377FS7BS)+ pinned version 不 fallback latest(PsychQuant/macdoc#112 verify R1 HIGH-2)。**版本刻意不 bump**:wrapper 以 plugin.json version 挑 binary release tag(v3.20.0),bump 會使下載目標指向不存在的 tag;shell/binary 版本解耦見 PsychQuant/macdoc#116。 + ## [Unreleased] ## [3.20.0] - 2026-05-04 diff --git a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh index 2d08728..1d7dcc4 100755 --- a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh +++ b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh @@ -5,12 +5,20 @@ # - Reads desired version from plugin.json (plugin's intended binary version) # - Compares against ~/bin/.CheWordMCP.version sidecar # - Re-downloads when plugin has been updated but binary is stale -# - Atomic file swap (.tmp + mv) so partial downloads never break things -# - Falls back to releases/latest if plugin.json unreadable or pinned tag missing +# - Unique temp file (mktemp, same fs) + atomic mv so partial downloads never break things +# - Pinned version does NOT fall back to releases/latest (supply-chain pinning); +# latest is used only when plugin.json carries no version # -# Fixed in 2.0.1: -# - No version check → now checks plugin.json version against installed sidecar -# - REPO was obsolete fork kiki830621 → now PsychQuant +# Supply-chain verification (PsychQuant/macdoc#112 security review R1+R2): +# - sha256 (MANDATORY): release must ship CheWordMCP.sha256; missing/malformed/ +# mismatching asset refuses install (fail-closed integrity gate) +# - Code signature (AUTHENTICITY): requirement-based codesign check pins the +# Apple chain + Team OU 6W377FS7BS. NOTE: a grep on `codesign -dvv` output is +# spoofable via the attacker-controlled Identifier field, and --verify alone +# accepts ad-hoc signatures (empirically reproduced in #112 verify round 1) — +# only the -R requirement form is sound. +# - On any verification failure: keep + exec the existing binary if present +# (fail-to-known-good), else exit 1. set -u @@ -19,20 +27,32 @@ BINARY_NAME="CheWordMCP" INSTALL_DIR="$HOME/bin" BINARY="$INSTALL_DIR/$BINARY_NAME" VERSION_FILE="$INSTALL_DIR/.${BINARY_NAME}.version" +SCRIPT_ARGS=("$@") # Locate plugin root via wrapper's own path (more reliable than $CLAUDE_PLUGIN_ROOT # which isn't guaranteed in MCP spawn env). Wrapper lives at PLUGIN_ROOT/bin/*.sh. PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" -# Read desired version from plugin.json (empty string on any failure → fallback to "latest"). +run_existing_or_die() { + # $1 = error message. Fail-to-known-good: prefer the already-installed + # binary over aborting the MCP server spawn entirely. + echo "$BINARY_NAME: ERROR — $1" >&2 + if [[ -x "$BINARY" ]]; then + echo "$BINARY_NAME: keeping existing binary" >&2 + exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} + fi + exit 1 +} + +# Read desired version from plugin.json (empty string on any failure → latest). DESIRED_VERSION="" if [[ -f "$PLUGIN_JSON" ]]; then DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ | head -1 | cut -d'"' -f4 || true) fi -# Read currently installed version from sidecar (empty string if file missing/unreadable). +# Read currently installed version from sidecar (empty string if missing). INSTALLED_VERSION="" [[ -f "$VERSION_FILE" ]] && INSTALLED_VERSION=$(tr -d '[:space:]' < "$VERSION_FILE" 2>/dev/null || true) @@ -51,42 +71,48 @@ if $NEED_DOWNLOAD; then echo "$BINARY_NAME: $REASON — downloading from $REPO..." >&2 mkdir -p "$INSTALL_DIR" - # Try pinned tag first, then fall back to latest release. - URL="" - for API_URL in \ - "${DESIRED_VERSION:+https://api.github.com/repos/$REPO/releases/tags/v$DESIRED_VERSION}" \ - "https://api.github.com/repos/$REPO/releases/latest" - do - [[ -z "$API_URL" ]] && continue - URL=$(curl -sL --max-time 30 "$API_URL" 2>/dev/null \ - | grep '"browser_download_url"' | grep "/$BINARY_NAME\"" | head -1 \ - | sed 's/.*"\(https[^"]*\)".*/\1/') - [[ -n "$URL" ]] && break - done - - if [[ -z "$URL" ]]; then - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: WARNING — no download URL found, keeping existing binary" >&2 - else - echo "$BINARY_NAME: ERROR — no download URL found at $REPO. Install manually: https://github.com/$REPO/releases" >&2 - exit 1 - fi + # Resolve release via the API-free direct-download endpoints (unauthenticated + # api.github.com is rate-limited to 60 req/hr per IP and fails closed here; + # the /releases/download/ redirect endpoints have no such limit). + # Pinned version does NOT fall back to latest — a missing pinned tag is a + # release-channel fault, not a downgrade licence. + if [[ -n "$DESIRED_VERSION" ]]; then + URL="https://github.com/$REPO/releases/download/v$DESIRED_VERSION/$BINARY_NAME" + TARGET_DESC="v$DESIRED_VERSION" else - if curl -sL --max-time 300 "$URL" -o "${BINARY}.tmp" 2>/dev/null; then - chmod +x "${BINARY}.tmp" - mv "${BINARY}.tmp" "$BINARY" - echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" - echo "$BINARY_NAME: installed v${DESIRED_VERSION:-latest}" >&2 - else - rm -f "${BINARY}.tmp" 2>/dev/null - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: WARNING — download failed, keeping existing binary" >&2 - else - echo "$BINARY_NAME: ERROR — download failed" >&2 - exit 1 - fi - fi + URL="https://github.com/$REPO/releases/latest/download/$BINARY_NAME" + TARGET_DESC="latest" fi + + TMP_FILE=$(mktemp "$INSTALL_DIR/.${BINARY_NAME}.download.XXXXXX") || run_existing_or_die "mktemp failed" + trap 'rm -f "$TMP_FILE"' EXIT + + # -w url_effective: after redirects the final URL contains /download/vX.Y.Z/, + # which is the authoritative resolved version (needed for the latest path). + EFFECTIVE_URL=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" -w '%{url_effective}' 2>/dev/null) \ + || run_existing_or_die "download failed for $TARGET_DESC at $REPO (pinned versions do not fall back to latest). Install manually: https://github.com/$REPO/releases" + RESOLVED_VERSION=$(printf '%s' "$EFFECTIVE_URL" | sed -En 's#.*/download/v?([^/]+)/[^/]+$#\1#p') + + # 1. sha256 — mandatory fail-closed integrity gate. + EXPECTED_SHA=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 30 "${URL}.sha256" 2>/dev/null \ + | head -1 | awk '{print $1}') + [[ "$EXPECTED_SHA" =~ ^[0-9a-fA-F]{64}$ ]] \ + || run_existing_or_die "missing/malformed .sha256 release asset — refusing to install" + ACTUAL_SHA=$(shasum -a 256 "$TMP_FILE" | awk '{print $1}') + [[ "$ACTUAL_SHA" == "$EXPECTED_SHA" ]] \ + || run_existing_or_die "sha256 mismatch against release asset — refusing to install" + + # 2. Code signature — requirement-based authenticity gate (see header). + codesign --verify --strict \ + -R '=anchor apple generic and certificate leaf[subject.OU] = "6W377FS7BS"' \ + "$TMP_FILE" 2>/dev/null \ + || run_existing_or_die "code-signature verification failed (not Developer ID Team 6W377FS7BS) — refusing to install" + + chmod +x "$TMP_FILE" + mv "$TMP_FILE" "$BINARY" + trap - EXIT + echo "${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}} (sha256 + Developer ID verified)" >&2 fi -exec "$BINARY" "$@" +exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} From 8330daaf99ae61989818c28f49cca44634ab81d7 Mon Sep 17 00:00:00 2001 From: che cheng Date: Thu, 2 Jul 2026 14:52:14 +0800 Subject: [PATCH 5/6] fix: pin Developer ID cert type + wrapper polish from verify R2 (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit R2 in-scope fixes (security M-1 / DA-1 converged + logic L1/L2 + regression LOWs): - codesign requirement now includes Developer ID marker OIDs (intermediate 6.2.6 + leaf 6.1.13) — without them any Apple cert type in the team OU (e.g. Apple Development) passed, contradicting the documented 'Developer ID' guarantee. Verified: rejects ad-hoc PoC, accepts all 3 shipping release binaries. - run_existing_or_die now removes TMP_FILE before exec (EXIT trap does not fire across exec — rejected downloads no longer leak) - dead RESOLVED_VERSION extraction removed (CDN redirect masks the tag; sidecar records the pinned version) - che-word-mcp CHANGELOG dual-Unreleased headings merged Refs #112 --- plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh | 17 +++++++++-------- .../che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh | 17 +++++++++-------- plugins/che-word-mcp/CHANGELOG.md | 2 -- .../che-word-mcp/bin/che-word-mcp-wrapper.sh | 17 +++++++++-------- 4 files changed, 27 insertions(+), 26 deletions(-) diff --git a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh index ee76f8e..e107084 100755 --- a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh +++ b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh @@ -38,6 +38,7 @@ run_existing_or_die() { # $1 = error message. Fail-to-known-good: prefer the already-installed # binary over aborting the MCP server spawn entirely. echo "$BINARY_NAME: ERROR — $1" >&2 + rm -f "${TMP_FILE:-}" 2>/dev/null # trap EXIT does not fire across exec — clean up rejected download here if [[ -x "$BINARY" ]]; then echo "$BINARY_NAME: keeping existing binary" >&2 exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} @@ -87,11 +88,11 @@ if $NEED_DOWNLOAD; then TMP_FILE=$(mktemp "$INSTALL_DIR/.${BINARY_NAME}.download.XXXXXX") || run_existing_or_die "mktemp failed" trap 'rm -f "$TMP_FILE"' EXIT - # -w url_effective: after redirects the final URL contains /download/vX.Y.Z/, - # which is the authoritative resolved version (needed for the latest path). - EFFECTIVE_URL=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" -w '%{url_effective}' 2>/dev/null) \ + # NOTE: successful downloads redirect to the release-assets CDN, so the + # effective URL does NOT expose the tag — the sidecar records the pinned + # version (all shipped plugins pin one; the latest path records "unknown"). + curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" 2>/dev/null \ || run_existing_or_die "download failed for $TARGET_DESC at $REPO (pinned versions do not fall back to latest). Install manually: https://github.com/$REPO/releases" - RESOLVED_VERSION=$(printf '%s' "$EFFECTIVE_URL" | sed -En 's#.*/download/v?([^/]+)/[^/]+$#\1#p') # 1. sha256 — mandatory fail-closed integrity gate. EXPECTED_SHA=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 30 "${URL}.sha256" 2>/dev/null \ @@ -104,15 +105,15 @@ if $NEED_DOWNLOAD; then # 2. Code signature — requirement-based authenticity gate (see header). codesign --verify --strict \ - -R '=anchor apple generic and certificate leaf[subject.OU] = "6W377FS7BS"' \ + -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ "$TMP_FILE" 2>/dev/null \ - || run_existing_or_die "code-signature verification failed (not Developer ID Team 6W377FS7BS) — refusing to install" + || run_existing_or_die "code-signature verification failed (not a Developer ID Application cert of Team 6W377FS7BS) — refusing to install" chmod +x "$TMP_FILE" mv "$TMP_FILE" "$BINARY" trap - EXIT - echo "${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}}" > "$VERSION_FILE" - echo "$BINARY_NAME: installed v${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}} (sha256 + Developer ID verified)" >&2 + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${DESIRED_VERSION:-unknown} (sha256 + Developer ID verified)" >&2 fi exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} diff --git a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh index f041f40..d4e5400 100755 --- a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh +++ b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh @@ -38,6 +38,7 @@ run_existing_or_die() { # $1 = error message. Fail-to-known-good: prefer the already-installed # binary over aborting the MCP server spawn entirely. echo "$BINARY_NAME: ERROR — $1" >&2 + rm -f "${TMP_FILE:-}" 2>/dev/null # trap EXIT does not fire across exec — clean up rejected download here if [[ -x "$BINARY" ]]; then echo "$BINARY_NAME: keeping existing binary" >&2 exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} @@ -87,11 +88,11 @@ if $NEED_DOWNLOAD; then TMP_FILE=$(mktemp "$INSTALL_DIR/.${BINARY_NAME}.download.XXXXXX") || run_existing_or_die "mktemp failed" trap 'rm -f "$TMP_FILE"' EXIT - # -w url_effective: after redirects the final URL contains /download/vX.Y.Z/, - # which is the authoritative resolved version (needed for the latest path). - EFFECTIVE_URL=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" -w '%{url_effective}' 2>/dev/null) \ + # NOTE: successful downloads redirect to the release-assets CDN, so the + # effective URL does NOT expose the tag — the sidecar records the pinned + # version (all shipped plugins pin one; the latest path records "unknown"). + curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" 2>/dev/null \ || run_existing_or_die "download failed for $TARGET_DESC at $REPO (pinned versions do not fall back to latest). Install manually: https://github.com/$REPO/releases" - RESOLVED_VERSION=$(printf '%s' "$EFFECTIVE_URL" | sed -En 's#.*/download/v?([^/]+)/[^/]+$#\1#p') # 1. sha256 — mandatory fail-closed integrity gate. EXPECTED_SHA=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 30 "${URL}.sha256" 2>/dev/null \ @@ -104,15 +105,15 @@ if $NEED_DOWNLOAD; then # 2. Code signature — requirement-based authenticity gate (see header). codesign --verify --strict \ - -R '=anchor apple generic and certificate leaf[subject.OU] = "6W377FS7BS"' \ + -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ "$TMP_FILE" 2>/dev/null \ - || run_existing_or_die "code-signature verification failed (not Developer ID Team 6W377FS7BS) — refusing to install" + || run_existing_or_die "code-signature verification failed (not a Developer ID Application cert of Team 6W377FS7BS) — refusing to install" chmod +x "$TMP_FILE" mv "$TMP_FILE" "$BINARY" trap - EXIT - echo "${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}}" > "$VERSION_FILE" - echo "$BINARY_NAME: installed v${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}} (sha256 + Developer ID verified)" >&2 + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${DESIRED_VERSION:-unknown} (sha256 + Developer ID verified)" >&2 fi exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} diff --git a/plugins/che-word-mcp/CHANGELOG.md b/plugins/che-word-mcp/CHANGELOG.md index 202905d..cde2bf7 100644 --- a/plugins/che-word-mcp/CHANGELOG.md +++ b/plugins/che-word-mcp/CHANGELOG.md @@ -15,8 +15,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Wrapper 改用硬化模板(同 che-pdf-mcp / che-pptx-mcp v0.1.0):強制 sha256 + requirement-based codesign(Team OU 6W377FS7BS)+ pinned version 不 fallback latest(PsychQuant/macdoc#112 verify R1 HIGH-2)。**版本刻意不 bump**:wrapper 以 plugin.json version 挑 binary release tag(v3.20.0),bump 會使下載目標指向不存在的 tag;shell/binary 版本解耦見 PsychQuant/macdoc#116。 -## [Unreleased] - ## [3.20.0] - 2026-05-04 ### Added diff --git a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh index 1d7dcc4..8b99543 100755 --- a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh +++ b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh @@ -38,6 +38,7 @@ run_existing_or_die() { # $1 = error message. Fail-to-known-good: prefer the already-installed # binary over aborting the MCP server spawn entirely. echo "$BINARY_NAME: ERROR — $1" >&2 + rm -f "${TMP_FILE:-}" 2>/dev/null # trap EXIT does not fire across exec — clean up rejected download here if [[ -x "$BINARY" ]]; then echo "$BINARY_NAME: keeping existing binary" >&2 exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} @@ -87,11 +88,11 @@ if $NEED_DOWNLOAD; then TMP_FILE=$(mktemp "$INSTALL_DIR/.${BINARY_NAME}.download.XXXXXX") || run_existing_or_die "mktemp failed" trap 'rm -f "$TMP_FILE"' EXIT - # -w url_effective: after redirects the final URL contains /download/vX.Y.Z/, - # which is the authoritative resolved version (needed for the latest path). - EFFECTIVE_URL=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" -w '%{url_effective}' 2>/dev/null) \ + # NOTE: successful downloads redirect to the release-assets CDN, so the + # effective URL does NOT expose the tag — the sidecar records the pinned + # version (all shipped plugins pin one; the latest path records "unknown"). + curl -fsSL --proto '=https' --tlsv1.2 --max-time 300 "$URL" -o "$TMP_FILE" 2>/dev/null \ || run_existing_or_die "download failed for $TARGET_DESC at $REPO (pinned versions do not fall back to latest). Install manually: https://github.com/$REPO/releases" - RESOLVED_VERSION=$(printf '%s' "$EFFECTIVE_URL" | sed -En 's#.*/download/v?([^/]+)/[^/]+$#\1#p') # 1. sha256 — mandatory fail-closed integrity gate. EXPECTED_SHA=$(curl -fsSL --proto '=https' --tlsv1.2 --max-time 30 "${URL}.sha256" 2>/dev/null \ @@ -104,15 +105,15 @@ if $NEED_DOWNLOAD; then # 2. Code signature — requirement-based authenticity gate (see header). codesign --verify --strict \ - -R '=anchor apple generic and certificate leaf[subject.OU] = "6W377FS7BS"' \ + -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ "$TMP_FILE" 2>/dev/null \ - || run_existing_or_die "code-signature verification failed (not Developer ID Team 6W377FS7BS) — refusing to install" + || run_existing_or_die "code-signature verification failed (not a Developer ID Application cert of Team 6W377FS7BS) — refusing to install" chmod +x "$TMP_FILE" mv "$TMP_FILE" "$BINARY" trap - EXIT - echo "${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}}" > "$VERSION_FILE" - echo "$BINARY_NAME: installed v${RESOLVED_VERSION:-${DESIRED_VERSION:-unknown}} (sha256 + Developer ID verified)" >&2 + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "$BINARY_NAME: installed v${DESIRED_VERSION:-unknown} (sha256 + Developer ID verified)" >&2 fi exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} From 07432d1b7bd9f2e8d560c1d2d24af17f28ee1718 Mon Sep 17 00:00:00 2001 From: che cheng Date: Thu, 2 Jul 2026 15:02:08 +0800 Subject: [PATCH 6/6] fix: exec-time signature re-verification + parse/install robustness (#112) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Verify R2 convergent finding (security M-2 demo + DA corroboration + Codex P1): binaries installed by the pre-hardening wrapper — including the formerly ad-hoc CheWordMCP — carry a matching version sidecar and were never re-checked, so the hardening never cleansed existing installs. Wrappers now re-verify the Developer ID requirement on EVERY spawn; a failing existing binary is deleted and re-downloaded once (adversarial E2E: planted ad-hoc binary + matching sidecar → detected, re-downloaded, verified). Also per Codex R2 P2s: version parse tolerates JSON spacing and fails closed when plugin.json exists but version is unparseable (no silent degrade to unpinned latest); chmod/mv failures no longer fake an install; plugin README wording now states exactly what the wrapper enforces vs what the release process provides (notarization). Refs #112 --- plugins/che-pdf-mcp/README.md | 2 +- .../che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh | 54 ++++++++++++++----- plugins/che-pptx-mcp/README.md | 2 +- .../che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh | 54 ++++++++++++++----- .../che-word-mcp/bin/che-word-mcp-wrapper.sh | 54 ++++++++++++++----- 5 files changed, 128 insertions(+), 38 deletions(-) diff --git a/plugins/che-pdf-mcp/README.md b/plugins/che-pdf-mcp/README.md index 98fe6ba..06d1513 100644 --- a/plugins/che-pdf-mcp/README.md +++ b/plugins/che-pdf-mcp/README.md @@ -9,7 +9,7 @@ claude plugin marketplace add PsychQuant/macdoc claude plugin install che-pdf-mcp@macdoc ``` -Wrapper 會自動從 [GitHub Releases](https://github.com/PsychQuant/che-pdf-mcp/releases) 下載 signed + notarized 的 `ChePDFMCP` universal binary 到 `~/bin/`。 +Wrapper 會自動從 [GitHub Releases](https://github.com/PsychQuant/che-pdf-mcp/releases) 下載 release 的 `ChePDFMCP` universal binary 到 `~/bin/`,安裝前與每次啟動時強制驗證 sha256(安裝時)與 Developer ID Application 簽章鏈(Team `6W377FS7BS`)。release 流程含 Apple notarization(wrapper 不重複檢查 notarization)。 ## 原始碼 diff --git a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh index e107084..cd48996 100755 --- a/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh +++ b/plugins/che-pdf-mcp/bin/che-pdf-mcp-wrapper.sh @@ -34,13 +34,24 @@ SCRIPT_ARGS=("$@") PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" +verify_binary() { + # Developer ID Application (marker OIDs) + Team OU pin. Runs on every + # candidate before exec — download-time AND exec-time (#112 verify R2: + # binaries installed by the pre-hardening wrapper — including ad-hoc + # ones — carry a matching sidecar and would otherwise never be re-checked). + codesign --verify --strict \ + -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ + "$1" 2>/dev/null +} + run_existing_or_die() { - # $1 = error message. Fail-to-known-good: prefer the already-installed - # binary over aborting the MCP server spawn entirely. + # $1 = error message. Fail-to-VERIFIED-good: prefer the already-installed + # binary over aborting the MCP server spawn — but only if it passes the + # same signature gate as a fresh download. echo "$BINARY_NAME: ERROR — $1" >&2 rm -f "${TMP_FILE:-}" 2>/dev/null # trap EXIT does not fire across exec — clean up rejected download here - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: keeping existing binary" >&2 + if [[ -x "$BINARY" ]] && verify_binary "$BINARY"; then + echo "$BINARY_NAME: keeping existing (signature-verified) binary" >&2 exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} fi exit 1 @@ -49,8 +60,13 @@ run_existing_or_die() { # Read desired version from plugin.json (empty string on any failure → latest). DESIRED_VERSION="" if [[ -f "$PLUGIN_JSON" ]]; then - DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ - | head -1 | cut -d'"' -f4 || true) + DESIRED_VERSION=$(grep -oE '"version"[[:space:]]*:[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ + | head -1 | sed -E 's/.*"([^"]+)"$/\1/' || true) + if [[ -z "$DESIRED_VERSION" ]]; then + # plugin.json exists but version unparseable — fail closed rather than + # silently degrading to the unpinned latest channel (#112 verify R2). + run_existing_or_die "cannot parse version from plugin.json — refusing unpinned download" + fi fi # Read currently installed version from sidecar (empty string if missing). @@ -104,16 +120,30 @@ if $NEED_DOWNLOAD; then || run_existing_or_die "sha256 mismatch against release asset — refusing to install" # 2. Code signature — requirement-based authenticity gate (see header). - codesign --verify --strict \ - -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ - "$TMP_FILE" 2>/dev/null \ + verify_binary "$TMP_FILE" \ || run_existing_or_die "code-signature verification failed (not a Developer ID Application cert of Team 6W377FS7BS) — refusing to install" - chmod +x "$TMP_FILE" - mv "$TMP_FILE" "$BINARY" + chmod +x "$TMP_FILE" || run_existing_or_die "chmod failed" + mv "$TMP_FILE" "$BINARY" || run_existing_or_die "install mv failed" trap - EXIT - echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" \ + || echo "$BINARY_NAME: WARNING — version sidecar write failed (next spawn re-downloads)" >&2 echo "$BINARY_NAME: installed v${DESIRED_VERSION:-unknown} (sha256 + Developer ID verified)" >&2 fi +# Exec-time re-verification: never exec an unverified binary, even one whose +# sidecar version matches (covers binaries installed by pre-hardening wrappers +# and post-install ~/bin tampering). Failure forces one re-download attempt. +if ! verify_binary "$BINARY"; then + if $NEED_DOWNLOAD; then + # We JUST downloaded + verified it; a failure here means tampering + # mid-flight — refuse outright. + echo "$BINARY_NAME: ERROR — freshly installed binary failed re-verification" >&2 + exit 1 + fi + echo "$BINARY_NAME: existing binary failed signature verification — re-downloading" >&2 + rm -f "$BINARY" "$VERSION_FILE" + exec "${BASH_SOURCE[0]}" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} +fi + exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} diff --git a/plugins/che-pptx-mcp/README.md b/plugins/che-pptx-mcp/README.md index 5f7dedd..555d7d2 100644 --- a/plugins/che-pptx-mcp/README.md +++ b/plugins/che-pptx-mcp/README.md @@ -9,7 +9,7 @@ claude plugin marketplace add PsychQuant/macdoc claude plugin install che-pptx-mcp@macdoc ``` -Wrapper 會自動從 [GitHub Releases](https://github.com/PsychQuant/che-pptx-mcp/releases) 下載 signed + notarized 的 `ChePPTXMCP` universal binary 到 `~/bin/`。 +Wrapper 會自動從 [GitHub Releases](https://github.com/PsychQuant/che-pptx-mcp/releases) 下載 release 的 `ChePPTXMCP` universal binary 到 `~/bin/`,安裝前與每次啟動時強制驗證 sha256(安裝時)與 Developer ID Application 簽章鏈(Team `6W377FS7BS`)。release 流程含 Apple notarization(wrapper 不重複檢查 notarization)。 ## 原始碼 diff --git a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh index d4e5400..c860aad 100755 --- a/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh +++ b/plugins/che-pptx-mcp/bin/che-pptx-mcp-wrapper.sh @@ -34,13 +34,24 @@ SCRIPT_ARGS=("$@") PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" +verify_binary() { + # Developer ID Application (marker OIDs) + Team OU pin. Runs on every + # candidate before exec — download-time AND exec-time (#112 verify R2: + # binaries installed by the pre-hardening wrapper — including ad-hoc + # ones — carry a matching sidecar and would otherwise never be re-checked). + codesign --verify --strict \ + -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ + "$1" 2>/dev/null +} + run_existing_or_die() { - # $1 = error message. Fail-to-known-good: prefer the already-installed - # binary over aborting the MCP server spawn entirely. + # $1 = error message. Fail-to-VERIFIED-good: prefer the already-installed + # binary over aborting the MCP server spawn — but only if it passes the + # same signature gate as a fresh download. echo "$BINARY_NAME: ERROR — $1" >&2 rm -f "${TMP_FILE:-}" 2>/dev/null # trap EXIT does not fire across exec — clean up rejected download here - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: keeping existing binary" >&2 + if [[ -x "$BINARY" ]] && verify_binary "$BINARY"; then + echo "$BINARY_NAME: keeping existing (signature-verified) binary" >&2 exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} fi exit 1 @@ -49,8 +60,13 @@ run_existing_or_die() { # Read desired version from plugin.json (empty string on any failure → latest). DESIRED_VERSION="" if [[ -f "$PLUGIN_JSON" ]]; then - DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ - | head -1 | cut -d'"' -f4 || true) + DESIRED_VERSION=$(grep -oE '"version"[[:space:]]*:[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ + | head -1 | sed -E 's/.*"([^"]+)"$/\1/' || true) + if [[ -z "$DESIRED_VERSION" ]]; then + # plugin.json exists but version unparseable — fail closed rather than + # silently degrading to the unpinned latest channel (#112 verify R2). + run_existing_or_die "cannot parse version from plugin.json — refusing unpinned download" + fi fi # Read currently installed version from sidecar (empty string if missing). @@ -104,16 +120,30 @@ if $NEED_DOWNLOAD; then || run_existing_or_die "sha256 mismatch against release asset — refusing to install" # 2. Code signature — requirement-based authenticity gate (see header). - codesign --verify --strict \ - -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ - "$TMP_FILE" 2>/dev/null \ + verify_binary "$TMP_FILE" \ || run_existing_or_die "code-signature verification failed (not a Developer ID Application cert of Team 6W377FS7BS) — refusing to install" - chmod +x "$TMP_FILE" - mv "$TMP_FILE" "$BINARY" + chmod +x "$TMP_FILE" || run_existing_or_die "chmod failed" + mv "$TMP_FILE" "$BINARY" || run_existing_or_die "install mv failed" trap - EXIT - echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" \ + || echo "$BINARY_NAME: WARNING — version sidecar write failed (next spawn re-downloads)" >&2 echo "$BINARY_NAME: installed v${DESIRED_VERSION:-unknown} (sha256 + Developer ID verified)" >&2 fi +# Exec-time re-verification: never exec an unverified binary, even one whose +# sidecar version matches (covers binaries installed by pre-hardening wrappers +# and post-install ~/bin tampering). Failure forces one re-download attempt. +if ! verify_binary "$BINARY"; then + if $NEED_DOWNLOAD; then + # We JUST downloaded + verified it; a failure here means tampering + # mid-flight — refuse outright. + echo "$BINARY_NAME: ERROR — freshly installed binary failed re-verification" >&2 + exit 1 + fi + echo "$BINARY_NAME: existing binary failed signature verification — re-downloading" >&2 + rm -f "$BINARY" "$VERSION_FILE" + exec "${BASH_SOURCE[0]}" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} +fi + exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} diff --git a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh index 8b99543..93b12cf 100755 --- a/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh +++ b/plugins/che-word-mcp/bin/che-word-mcp-wrapper.sh @@ -34,13 +34,24 @@ SCRIPT_ARGS=("$@") PLUGIN_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" PLUGIN_JSON="$PLUGIN_ROOT/.claude-plugin/plugin.json" +verify_binary() { + # Developer ID Application (marker OIDs) + Team OU pin. Runs on every + # candidate before exec — download-time AND exec-time (#112 verify R2: + # binaries installed by the pre-hardening wrapper — including ad-hoc + # ones — carry a matching sidecar and would otherwise never be re-checked). + codesign --verify --strict \ + -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ + "$1" 2>/dev/null +} + run_existing_or_die() { - # $1 = error message. Fail-to-known-good: prefer the already-installed - # binary over aborting the MCP server spawn entirely. + # $1 = error message. Fail-to-VERIFIED-good: prefer the already-installed + # binary over aborting the MCP server spawn — but only if it passes the + # same signature gate as a fresh download. echo "$BINARY_NAME: ERROR — $1" >&2 rm -f "${TMP_FILE:-}" 2>/dev/null # trap EXIT does not fire across exec — clean up rejected download here - if [[ -x "$BINARY" ]]; then - echo "$BINARY_NAME: keeping existing binary" >&2 + if [[ -x "$BINARY" ]] && verify_binary "$BINARY"; then + echo "$BINARY_NAME: keeping existing (signature-verified) binary" >&2 exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} fi exit 1 @@ -49,8 +60,13 @@ run_existing_or_die() { # Read desired version from plugin.json (empty string on any failure → latest). DESIRED_VERSION="" if [[ -f "$PLUGIN_JSON" ]]; then - DESIRED_VERSION=$(grep -oE '"version":[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ - | head -1 | cut -d'"' -f4 || true) + DESIRED_VERSION=$(grep -oE '"version"[[:space:]]*:[[:space:]]*"[^"]+"' "$PLUGIN_JSON" 2>/dev/null \ + | head -1 | sed -E 's/.*"([^"]+)"$/\1/' || true) + if [[ -z "$DESIRED_VERSION" ]]; then + # plugin.json exists but version unparseable — fail closed rather than + # silently degrading to the unpinned latest channel (#112 verify R2). + run_existing_or_die "cannot parse version from plugin.json — refusing unpinned download" + fi fi # Read currently installed version from sidecar (empty string if missing). @@ -104,16 +120,30 @@ if $NEED_DOWNLOAD; then || run_existing_or_die "sha256 mismatch against release asset — refusing to install" # 2. Code signature — requirement-based authenticity gate (see header). - codesign --verify --strict \ - -R '=anchor apple generic and certificate 1[field.1.2.840.113635.100.6.2.6] exists and certificate leaf[field.1.2.840.113635.100.6.1.13] exists and certificate leaf[subject.OU] = "6W377FS7BS"' \ - "$TMP_FILE" 2>/dev/null \ + verify_binary "$TMP_FILE" \ || run_existing_or_die "code-signature verification failed (not a Developer ID Application cert of Team 6W377FS7BS) — refusing to install" - chmod +x "$TMP_FILE" - mv "$TMP_FILE" "$BINARY" + chmod +x "$TMP_FILE" || run_existing_or_die "chmod failed" + mv "$TMP_FILE" "$BINARY" || run_existing_or_die "install mv failed" trap - EXIT - echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" + echo "${DESIRED_VERSION:-unknown}" > "$VERSION_FILE" \ + || echo "$BINARY_NAME: WARNING — version sidecar write failed (next spawn re-downloads)" >&2 echo "$BINARY_NAME: installed v${DESIRED_VERSION:-unknown} (sha256 + Developer ID verified)" >&2 fi +# Exec-time re-verification: never exec an unverified binary, even one whose +# sidecar version matches (covers binaries installed by pre-hardening wrappers +# and post-install ~/bin tampering). Failure forces one re-download attempt. +if ! verify_binary "$BINARY"; then + if $NEED_DOWNLOAD; then + # We JUST downloaded + verified it; a failure here means tampering + # mid-flight — refuse outright. + echo "$BINARY_NAME: ERROR — freshly installed binary failed re-verification" >&2 + exit 1 + fi + echo "$BINARY_NAME: existing binary failed signature verification — re-downloading" >&2 + rm -f "$BINARY" "$VERSION_FILE" + exec "${BASH_SOURCE[0]}" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"} +fi + exec "$BINARY" ${SCRIPT_ARGS[@]+"${SCRIPT_ARGS[@]}"}