Skip to content

Commit

Permalink
🎨 HTML 转换 Markdown 时转义文本中出现的标记符 siyuan-note/siyuan#6348
Browse files Browse the repository at this point in the history
  • Loading branch information
88250 committed Oct 25, 2022
1 parent ff625e2 commit 319a4de
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 17 deletions.
2 changes: 2 additions & 0 deletions h2m.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/88250/lute/editor"
"github.com/88250/lute/html"
"github.com/88250/lute/html/atom"
"github.com/88250/lute/lex"
"github.com/88250/lute/parse"
"github.com/88250/lute/render"
"github.com/88250/lute/util"
Expand Down Expand Up @@ -122,6 +123,7 @@ func (lute *Lute) genASTByDOM(n *html.Node, tree *parse.Tree) {
}
node.Tokens = bytes.ReplaceAll(node.Tokens, []byte{194, 160}, []byte{' '}) // 将   转换为空格
node.Tokens = bytes.ReplaceAll(node.Tokens, []byte("\n"), []byte{' '}) // 将 \n 转换为空格 https://github.com/siyuan-note/siyuan/issues/6052
node.Tokens = lex.EscapeMarkers(node.Tokens)
if nil != n.Parent && atom.Span == n.Parent.DataAtom && ("" != util.DomAttrValue(n.Parent, "class")) {
if lastc := tree.Context.Tip.LastChild; nil == lastc || (ast.NodeText == lastc.Type && !bytes.HasSuffix(lastc.Tokens, []byte("**"))) {
node.Tokens = []byte("**" + util.BytesToStr(node.Tokens) + "**")
Expand Down
6 changes: 3 additions & 3 deletions javascript/lute.min.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion javascript/lute.min.js.map

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions lex/token.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,34 @@ func BytesShowLength(bytes []byte) int {
}
return length
}

func EscapeMarkers(tokens []byte) []byte {
for i := 0; i < len(tokens); i++ {
if IsCommonInlineMarker(tokens[i]) {
remains := append([]byte{ItemBackslash}, tokens[i:]...)
tokens = tokens[:i]
tokens = append(tokens, remains...)
i++
}
}
return tokens
}

func IsMarker(token byte) bool {
switch token {
case ItemAsterisk, ItemUnderscore, ItemOpenBracket, ItemBang, ItemNewline, ItemBackslash, ItemBacktick, ItemLess,
ItemCloseBracket, ItemAmpersand, ItemTilde, ItemDollar, ItemOpenBrace, ItemOpenParen, ItemEqual, ItemCrosshatch:
return true
default:
return false
}
}

func IsCommonInlineMarker(token byte) bool {
switch token {
case ItemAsterisk, ItemUnderscore, ItemBackslash, ItemBacktick, ItemTilde, ItemDollar:
return true
default:
return false
}
}
16 changes: 6 additions & 10 deletions parse/text.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,14 @@ func (t *Tree) parseText(ctx *InlineContext) *ast.Node {

// isMarker 判断 token 是否是潜在的 Markdown 标记符。
func (t *Tree) isMarker(token byte) bool {
switch token {
case lex.ItemAsterisk, lex.ItemUnderscore, lex.ItemOpenBracket, lex.ItemBang, lex.ItemNewline, lex.ItemBackslash, lex.ItemBacktick, lex.ItemLess,
lex.ItemCloseBracket, lex.ItemAmpersand, lex.ItemTilde, lex.ItemDollar, lex.ItemOpenBrace, lex.ItemOpenParen, lex.ItemEqual, lex.ItemCrosshatch:
if lex.IsMarker(token) {
return true
}

if t.Context.ParseOption.Sup && lex.ItemCaret == token {
return true
case lex.ItemCaret:
if t.Context.ParseOption.Sup {
return true
}
return false
default:
return false
}
return false
}

var backslash = util.StrToBytes("\\")
Expand Down
7 changes: 4 additions & 3 deletions test/h2m_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (

var html2MdTests = []parseTest{

{"74", "foo**bar**<strong>baz</strong>bazz", "foo\\*\\*bar\\*\\***baz**bazz\n"},
{"73", "<p>foo</p>\n<ul>\n<li>bar\n</ul>\n<p>baz</p>", "foo\n\n * bar\n\n baz\n"},
{"72", "<p>foo</p>\n\n<ul>\n<li>bar\n</ul>\n\n<p>baz</p>", "foo\n\n* bar\n\nbaz\n"},
{"71", "<strong><sup>[1]</sup></strong>1", "**^[1]^**1\n"},
Expand Down Expand Up @@ -81,7 +82,7 @@ var html2MdTests = []parseTest{
</tr>
</table>
</body>
</html>`, "| Month | Savings |\n| ---------- | --------- |\n| January | $100 |\n| February | $80 |\n"},
</html>`, "| Month | Savings |\n| ---------- | --------- |\n| January | \\$100 |\n| February | \\$80 |\n"},
{"27", `<html>
<body>
<table>
Expand All @@ -103,8 +104,8 @@ var html2MdTests = []parseTest{
</tbody>
</table>
</body>
</html>`, "| Month | Savings |\n| ---------- | --------- |\n| January | $100 |\n| February | $80 |\n"},
{"26", "<table class=\"markdown-reference\"><thead><tr><th>Type</th><th class=\"second-example\">Or</th><th>… to Get</th></tr></thead><tbody><tr><td class=\"preformatted\">*Italic*</td><td class=\"preformatted second-example\">_Italic_</td><td><em>Italic</em></td></tr><tr><td class=\"preformatted\">**Bold**</td><td class=\"preformatted second-example\">__Bold__</td><td><strong>Bold</strong></td></tr><tr><td class=\"preformatted\"># Heading 1</td><td class=\"preformatted second-example\">Heading 1<br>=========</td><td><h1 class=\"smaller-h1\">Heading 1</h1></td></tr><tr><td class=\"preformatted\">## Heading 2</td><td class=\"preformatted second-example\">Heading 2<br>---------</td><td><h2 class=\"smaller-h2\">Heading 2</h2></td></tr><tr><td class=\"preformatted\">[Link](http://a.com)</td><td class=\"preformatted second-example\">[Link][1]<br>⋮<br>[1]: http://b.org</td><td><a href=\"https://commonmark.org/\">Link</a></td></tr><tr><td class=\"preformatted\">![Image](http://url/a.png)</td><td class=\"preformatted second-example\">![Image][1]<br>⋮<br>[1]: http://url/b.jpg</td><td><img src=\"https://commonmark.org/help/images/favicon.png\" width=\"36\" height=\"36\" alt=\"Markdown\"></td></tr><tr><td class=\"preformatted\">&gt; Blockquote</td><td class=\"preformatted second-example\">&nbsp;</td><td><blockquote>Blockquote</blockquote></td></tr><tr><td class=\"preformatted\"><p>* List<br>* List<br>* List</p></td><td class=\"preformatted second-example\"><p>- List<br>- List<br>- List<br></p></td><td><ul><li>List</li><li>List</li><li>List</li></ul></td></tr></tbody></table>", "| Type | Or | … to Get |\n| ---------------------------- | -------------------------------------- | ----------------------------------------------------------- |\n| *Italic* | _Italic_ | *Italic* |\n| **Bold** | __Bold__ | **Bold** |\n| # Heading 1 | Heading 1<br/>========= | # Heading 1 |\n| ## Heading 2 | Heading 2<br/>--------- | ## Heading 2 |\n| [Link](http://a.com) | [Link][1]<br/>⋮<br/>[1]: http://b.org | [Link](https://commonmark.org/) |\n| ![Image](http://url/a.png) | ![Image][1]<br/>⋮<br/>[1]: http://url/b.jpg | ![Markdown](https://commonmark.org/help/images/favicon.png) |\n| > Blockquote | | > Blockquote |\n| * List<br/>* List<br/>* List | - List<br/>- List<br/>- List<br/> | * List* List* List |\n"},
</html>`, "| Month | Savings |\n| ---------- | --------- |\n| January | \\$100 |\n| February | \\$80 |\n"},
{"26", "<table class=\"markdown-reference\"><thead><tr><th>Type</th><th class=\"second-example\">Or</th><th>… to Get</th></tr></thead><tbody><tr><td class=\"preformatted\">*Italic*</td><td class=\"preformatted second-example\">_Italic_</td><td><em>Italic</em></td></tr><tr><td class=\"preformatted\">**Bold**</td><td class=\"preformatted second-example\">__Bold__</td><td><strong>Bold</strong></td></tr><tr><td class=\"preformatted\"># Heading 1</td><td class=\"preformatted second-example\">Heading 1<br>=========</td><td><h1 class=\"smaller-h1\">Heading 1</h1></td></tr><tr><td class=\"preformatted\">## Heading 2</td><td class=\"preformatted second-example\">Heading 2<br>---------</td><td><h2 class=\"smaller-h2\">Heading 2</h2></td></tr><tr><td class=\"preformatted\">[Link](http://a.com)</td><td class=\"preformatted second-example\">[Link][1]<br>⋮<br>[1]: http://b.org</td><td><a href=\"https://commonmark.org/\">Link</a></td></tr><tr><td class=\"preformatted\">![Image](http://url/a.png)</td><td class=\"preformatted second-example\">![Image][1]<br>⋮<br>[1]: http://url/b.jpg</td><td><img src=\"https://commonmark.org/help/images/favicon.png\" width=\"36\" height=\"36\" alt=\"Markdown\"></td></tr><tr><td class=\"preformatted\">&gt; Blockquote</td><td class=\"preformatted second-example\">&nbsp;</td><td><blockquote>Blockquote</blockquote></td></tr><tr><td class=\"preformatted\"><p>* List<br>* List<br>* List</p></td><td class=\"preformatted second-example\"><p>- List<br>- List<br>- List<br></p></td><td><ul><li>List</li><li>List</li><li>List</li></ul></td></tr></tbody></table>", "| Type | Or | … to Get |\n| ---------------------------- | -------------------------------------- | ----------------------------------------------------------- |\n| \\*Italic\\* | \\_Italic\\_ | *Italic* |\n| \\*\\*Bold\\*\\* | \\_\\_Bold\\_\\_ | **Bold** |\n| # Heading 1 | Heading 1<br/>========= | # Heading 1 |\n| ## Heading 2 | Heading 2<br/>--------- | ## Heading 2 |\n| [Link](http://a.com) | [Link][1]<br/>⋮<br/>[1]: http://b.org | [Link](https://commonmark.org/) |\n| ![Image](http://url/a.png) | ![Image][1]<br/>⋮<br/>[1]: http://url/b.jpg | ![Markdown](https://commonmark.org/help/images/favicon.png) |\n| > Blockquote | | > Blockquote |\n| \\* List<br/>\\* List<br/>\\* List | - List<br/>- List<br/>- List<br/> | * List* List* List |\n"},
{"25", "<table class=\"table table-bordered\"><thead class=\"thead-light\"><tr><th>Element</th><th>Markdown Syntax</th></tr></thead><tbody><tr><td><a href=\"https://www.markdownguide.org/extended-syntax/#tables\">Table</a></td><td><code>| Syntax | Description |<br>| ----------- | ----------- |<br>| Header | Title |<br>| Paragraph | Text |</code></td></tr><tr><td><a href=\"https://www.markdownguide.org/extended-syntax/#fenced-code-blocks\">Fenced Code Block</a></td><td><code>```<br>{<br>&nbsp;&nbsp;\"firstName\": \"John\",<br>&nbsp;&nbsp;\"lastName\": \"Smith\",<br>&nbsp;&nbsp;\"age\": 25<br>}<br>```</code></td></tr></tbody></table>", "| Element | Markdown Syntax |\n| ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |\n| [Table](https://www.markdownguide.org/extended-syntax/#tables) | `\\| Syntax \\| Description \\|\\| ----------- \\| ----------- \\|\\| Header \\| Title \\|\\| Paragraph \\| Text \\|` |\n| [Fenced Code Block](https://www.markdownguide.org/extended-syntax/#fenced-code-blocks) | ````{\u00a0\u00a0\"firstName\": \"John\",\u00a0\u00a0\"lastName\": \"Smith\",\u00a0\u00a0\"age\": 25}```` |\n"},
{"24", "<table><thead><tr><th>Element</th><th>Markdown Syntax</th></tr></thead><tbody><tr><td>Table</td><td><code>| Syntax | Description |<br>| ----------- | ----------- |<br>| Header | Title |<br>| Paragraph | Text |</code></td></tr></tbody></table>", "| Element | Markdown Syntax |\n| --------- | ------------------------------------------------------------------------------------------------------------------ |\n| Table | `\\| Syntax \\| Description \\|\\| ----------- \\| ----------- \\|\\| Header \\| Title \\|\\| Paragraph \\| Text \\|` |\n"},
{"23", "<h2 style=\"box-sizing: border-box; margin-top: 24px; margin-bottom: 16px; font-weight: 600; font-size: 1.5em; line-height: 1.25; padding-bottom: 0.3em; border-bottom: 1px solid rgb(234, 236, 239); color: rgb(36, 41, 46); font-family: -apple-system, BlinkMacSystemFont, &quot;Segoe UI&quot;, Helvetica, Arial, sans-serif, &quot;Apple Color Emoji&quot;, &quot;Segoe UI Emoji&quot;; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; background-color: rgb(255, 255, 255); text-decoration-style: initial; text-decoration-color: initial;\"><g-emoji class=\"g-emoji\" alias=\"m\" fallback-src=\"https://github.githubassets.com/images/icons/emoji/unicode/24c2.png\" style=\"box-sizing: border-box; font-family: &quot;Apple Color Emoji&quot;, &quot;Segoe UI&quot;, &quot;Segoe UI Emoji&quot;, &quot;Segoe UI Symbol&quot;; font-size: 1.2em; font-weight: 400; line-height: 20px; vertical-align: middle; font-style: normal !important;\">Ⓜ️</g-emoji><span> </span>Markdown User Guide</h2>", "## Ⓜ️ Markdown User Guide\n"},
Expand Down

0 comments on commit 319a4de

Please sign in to comment.