diff --git a/CHANGELOG.md b/CHANGELOG.md index 3964d62..cd4030d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `page get --as raw` emits the page body's untouched source — storage-format + XHTML, or server-rendered HTML with `--body-format view` — with no + markdown/text rendering. Use it to inspect macros, round-trip-edit a page or + debug. It requires `--scope full`. +- `page get` now reports a `render_notes` field when markdown/text rendering + drops or degrades content (macros without a native rendering, images shown + as placeholders). Rendering loss was previously silent; when `render_notes` + is present, re-read with `--as raw` for the full source. + ## [0.3.1] - 2026-05-19 ### Fixed diff --git a/docs/cli/index.html b/docs/cli/index.html index c20116c..94ffd43 100644 --- a/docs/cli/index.html +++ b/docs/cli/index.html @@ -635,12 +635,16 @@

confluence-cli page get

outline list the headings (start here when the structure is unknown) section one section, identified by --section <id> from the outline keyword blocks matching --keyword, with their heading for context - full the entire body (default)

+ full the entire body (default) + +Rendering to markdown/text drops content it cannot represent (macros, +images); when that happens the result carries a render_notes field. +Use --as raw to get the untouched source body instead.

Options

- + @@ -656,6 +660,9 @@

Examples

  # render the whole page as Markdown  # get the untouched storage XHTML (macros and all)
+  confluence-cli page get 123456 --as raw
+
   # a page URL works in place of an ID
   confluence-cli page get https://wiki.example.com/pages/viewpage.action?pageId=123456
diff --git a/docs/technical-design.md b/docs/technical-design.md index 5f023b5..c1869f0 100644 --- a/docs/technical-design.md +++ b/docs/technical-design.md @@ -168,6 +168,10 @@ CI 校验不漂移)—— 本节不再维护并行的命令清单,以杜绝 交互式向导(`config init`、`auth login`)的提示走 stderr;错误只走 stderr。 +读取侧:`page get` 的 `--as markdown|text` 渲染是有损的(不支持的宏被丢弃、图片 +降级为占位符)。损耗不再静默 —— 渲染丢内容时输出带 `render_notes` 字段;`--as raw` +则原样返回未经渲染的正文源(storage XHTML 或 view HTML),作为无损出口。 + ### 6.2 错误 错误以 JSON 写 **stderr**: diff --git a/internal/app/app_test.go b/internal/app/app_test.go index 148aee7..ed3296b 100644 --- a/internal/app/app_test.go +++ b/internal/app/app_test.go @@ -35,6 +35,12 @@ func mockConfluence(t *testing.T) *httptest.Server { "body":{"storage":{"value":"

Hi

body text

","representation":"storage"}}, "_links":{"webui":"/display/ENG/Welcome"}}`)) }) + mux.HandleFunc("/rest/api/content/790", func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte(`{"id":"790","type":"page","status":"current","title":"Macro Page", + "space":{"key":"ENG"},"version":{"number":1}, + "body":{"storage":{"value":"

before

","representation":"storage"}}, + "_links":{"webui":"/display/ENG/Macro"}}`)) + }) mux.HandleFunc("/rest/api/content/404", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusNotFound) w.Write([]byte(`{"message":"No content found"}`)) diff --git a/internal/app/page.go b/internal/app/page.go index ec9bd2b..46611d4 100644 --- a/internal/app/page.go +++ b/internal/app/page.go @@ -27,6 +27,9 @@ type pageOutput struct { Body string `json:"body,omitempty"` ScopeApplied string `json:"scope_applied,omitempty"` Truncated bool `json:"truncated,omitempty"` + // RenderNotes lists content the markdown/text renderer could not represent + // (e.g. unrendered macros). When non-empty, re-read with --as raw. + RenderNotes []string `json:"render_notes,omitempty"` } // dryRunOutput is the result shape emitted for a --dry-run write. @@ -154,12 +157,17 @@ func newPageGetCmd(s *appState) *cobra.Command { " outline list the headings (start here when the structure is unknown)\n" + " section one section, identified by --section from the outline\n" + " keyword blocks matching --keyword, with their heading for context\n" + - " full the entire body (default)", + " full the entire body (default)\n\n" + + "Rendering to markdown/text drops content it cannot represent (macros,\n" + + "images); when that happens the result carries a render_notes field.\n" + + "Use --as raw to get the untouched source body instead.", Example: " # render the whole page as Markdown\n" + " confluence-cli page get 123456\n\n" + " # list the headings, then read just one section\n" + " confluence-cli page get 123456 --scope outline\n" + " confluence-cli page get 123456 --scope section --section sec-2\n\n" + + " # get the untouched storage XHTML (macros and all)\n" + + " confluence-cli page get 123456 --as raw\n\n" + " # a page URL works in place of an ID\n" + " confluence-cli page get https://wiki.example.com/pages/viewpage.action?pageId=123456", Args: cobra.ExactArgs(1), @@ -187,17 +195,30 @@ func newPageGetCmd(s *appState) *cobra.Command { Version: page.Version, Ancestors: page.Ancestors, } if !noBody && page.Body != nil { - rendered, err := render.Render(page.Body.Value, render.Options{ - Scope: scope, Detail: detail, As: as, - SectionID: section, Keyword: keyword, - }) - if err != nil { - return err + if as == "raw" { + // raw emits the body exactly as fetched, with no rendering; + // slicing the unparsed source is not supported. + if scope != render.ScopeFull { + return cerrors.New(cerrors.CategoryUsage, "RAW_NEEDS_FULL_SCOPE", + "--as raw supports only --scope full"). + WithHint("Drop --scope, or drop --as raw to read a section.") + } + out.Body = page.Body.Value + out.ScopeApplied = "raw" + } else { + rendered, err := render.Render(page.Body.Value, render.Options{ + Scope: scope, Detail: detail, As: as, + SectionID: section, Keyword: keyword, + }) + if err != nil { + return err + } + out.Outline = rendered.Outline + out.Body = rendered.Body + out.ScopeApplied = rendered.ScopeApplied + out.Truncated = rendered.Truncated + out.RenderNotes = rendered.Notes } - out.Outline = rendered.Outline - out.Body = rendered.Body - out.ScopeApplied = rendered.ScopeApplied - out.Truncated = rendered.Truncated } return s.emit(out) }, @@ -208,12 +229,12 @@ func newPageGetCmd(s *appState) *cobra.Command { f.StringVar(&scope, "scope", "full", "read scope: full, outline, section or keyword") f.StringVar(§ion, "section", "", "section ID (with --scope section)") f.StringVar(&keyword, "keyword", "", "keyword (with --scope keyword)") - f.StringVar(&as, "as", "markdown", "render body as markdown or text") + f.StringVar(&as, "as", "markdown", "output form: markdown, text or raw (unrendered source)") f.BoolVar(&noBody, "no-body", false, "fetch metadata only, skip the body") enumComplete(cmd, "body-format", "storage", "view") enumComplete(cmd, "detail", "simple", "with-ids", "full") enumComplete(cmd, "scope", "full", "outline", "section", "keyword") - enumComplete(cmd, "as", "markdown", "text") + enumComplete(cmd, "as", "markdown", "text", "raw") return cmd } diff --git a/internal/app/writes_test.go b/internal/app/writes_test.go index 130b30a..200c8b1 100644 --- a/internal/app/writes_test.go +++ b/internal/app/writes_test.go @@ -351,6 +351,51 @@ func TestCmdSearchListEnvelope(t *testing.T) { } } +func TestCmdPageGetRenderNotes(t *testing.T) { + srv := mockConfluence(t) + out, err := runCLI(t, srv, "page", "get", "790") + if err != nil { + t.Fatal(err) + } + var got map[string]any + json.Unmarshal([]byte(out), &got) + notes, _ := got["render_notes"].([]any) + if len(notes) == 0 { + t.Fatalf("page with a view-file macro should report render_notes:\n%s", out) + } + if first, _ := notes[0].(string); !strings.Contains(first, "view-file") { + t.Errorf("render_notes should name the macro: %v", notes) + } +} + +func TestCmdPageGetRaw(t *testing.T) { + srv := mockConfluence(t) + out, err := runCLI(t, srv, "page", "get", "790", "--as", "raw") + if err != nil { + t.Fatal(err) + } + var got map[string]any + json.Unmarshal([]byte(out), &got) + body, _ := got["body"].(string) + // raw emits the storage source untouched — the macro tag must survive. + if !strings.Contains(body, "", "").Replace(storage) root, err := html.Parse(strings.NewReader("" + src + "")) if err != nil { - return []Block{{Kind: KindPara, Text: strings.TrimSpace(stripTags(storage))}} + return []Block{{Kind: KindPara, Text: strings.TrimSpace(stripTags(storage))}}, nil } body := findBody(root) if body == nil { - return nil + return nil, nil } var blocks []Block walkBlocks(body, &blocks) - return blocks + return blocks, lossNotes(body) +} + +// lossNotes walks the parsed tree and reports content that markdown/text +// rendering drops or degrades: structured macros without a native rendering, +// and images (shown only as a placeholder). Each kind is reported once. +func lossNotes(root *html.Node) []string { + seen := map[string]bool{} + var notes []string + var walk func(*html.Node) + walk = func(n *html.Node) { + if n.Type == html.ElementNode { + switch strings.ToLower(n.Data) { + case "ac:structured-macro": + name := attrNS(n, "name") + // code/noformat macros render losslessly as code blocks. + if name != "" && name != "code" && name != "noformat" && !seen["macro:"+name] { + seen["macro:"+name] = true + notes = append(notes, "unrendered macro: "+name+" (use --as raw to see the source)") + } + case "ac:image": + if !seen["image"] { + seen["image"] = true + notes = append(notes, "an image is shown only as a placeholder (use --as raw to see the source)") + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + walk(c) + } + } + walk(root) + return notes } func findBody(n *html.Node) *html.Node { diff --git a/internal/render/render.go b/internal/render/render.go index 97e2204..fb1485f 100644 --- a/internal/render/render.go +++ b/internal/render/render.go @@ -64,16 +64,20 @@ type Rendered struct { Body string `json:"body"` ScopeApplied string `json:"scope_applied"` Truncated bool `json:"truncated"` + // Notes lists content the renderer could not represent (macros without a + // native rendering, images shown as placeholders). It is empty when the + // markdown/text output is a faithful representation of the source. + Notes []string `json:"notes,omitempty"` } // Render parses storage-format XHTML and renders it according to opt. func Render(storage string, opt Options) (Rendered, error) { opt = opt.withDefaults() - blocks := parse(storage) + blocks, notes := parse(storage) assignSections(blocks) outline := buildOutline(blocks) - result := Rendered{Outline: outline, ScopeApplied: opt.Scope} + result := Rendered{Outline: outline, ScopeApplied: opt.Scope, Notes: notes} switch opt.Scope { case ScopeFull: diff --git a/internal/render/render_test.go b/internal/render/render_test.go index fa0466a..7160d68 100644 --- a/internal/render/render_test.go +++ b/internal/render/render_test.go @@ -167,3 +167,34 @@ func TestRenderLink(t *testing.T) { t.Errorf("link not rendered:\n%s", got.Body) } } + +func TestRenderNotesReportsDroppedMacro(t *testing.T) { + t.Parallel() + storage := `

intro

` + + `` + + `` + + `

` + got, err := Render(storage, Options{Scope: ScopeFull}) + if err != nil { + t.Fatal(err) + } + joined := strings.Join(got.Notes, "\n") + if !strings.Contains(joined, "view-file") { + t.Errorf("notes should report the dropped view-file macro: %v", got.Notes) + } + if !strings.Contains(joined, "image") { + t.Errorf("notes should report the placeholdered image: %v", got.Notes) + } +} + +func TestRenderNotesEmptyForPlainPage(t *testing.T) { + t.Parallel() + // Headings, paragraphs, lists and code macros all render losslessly. + got, err := Render(sample, Options{Scope: ScopeFull}) + if err != nil { + t.Fatal(err) + } + if len(got.Notes) != 0 { + t.Errorf("a faithfully rendered page should carry no notes: %v", got.Notes) + } +} diff --git a/skills/confluence/references/reading-pages.md b/skills/confluence/references/reading-pages.md index 08ce150..31f6a09 100644 --- a/skills/confluence/references/reading-pages.md +++ b/skills/confluence/references/reading-pages.md @@ -56,16 +56,39 @@ Returns each block containing the term plus its nearest heading for context. ## Output syntax -`--as markdown` (default) renders headings, lists, code and tables as Markdown. -`--as text` produces plain text. `--no-body` fetches metadata only. -`--body-format storage|view` selects the source representation (default -`storage`). +`--as` controls the output form: + +| `--as` | output | +|--------|--------| +| `markdown` (default) | headings, lists, code, tables rendered as Markdown | +| `text` | plain text | +| `raw` | the body's **untouched source** — no rendering (requires `--scope full`) | + +`--no-body` fetches metadata only. `--body-format storage|view` selects the +source representation to fetch (default `storage`). + +## Rendering loss — macros and images + +`markdown` / `text` rendering cannot represent every Confluence construct: +macros without a native rendering (e.g. `view-file`) are dropped, and images +become a `[image]` placeholder. When that happens `page get` reports a +**`render_notes`** array naming what was lost. + +**If you see `render_notes`, the rendered `body` is incomplete.** Re-read the +page with `--as raw` to get the exact storage XHTML — macros and all — e.g. to +verify an embedded file or to round-trip-edit the page. + +```bash +confluence-cli page get 12345 # render_notes appears if content was dropped +confluence-cli page get 12345 --as raw # the full, unrendered storage source +``` ## Result shape `page get` returns: `id`, `title`, `space_key`, `status`, `url`, `version`, -`ancestors`, and — when a body was fetched — `outline`, `body`, `scope_applied` -and `truncated`. A `truncated: true` means the scope omitted part of the page. +`ancestors`, and — when a body was fetched — `outline`, `body`, `scope_applied`, +`truncated` and (when rendering dropped content) `render_notes`. A +`truncated: true` means the scope omitted part of the page. ## Browsing the page tree
FlagDefaultDescription
--asmarkdownrender body as markdown or text
--asmarkdownoutput form: markdown, text or raw (unrendered source)
--body-formatstoragesource body format: storage or view
--detailsimpleblock detail: simple, with-ids or full
--keywordkeyword (with --scope keyword)