diff --git a/src/rich-text/markdown-serializer.ts b/src/rich-text/markdown-serializer.ts index a7734c3b..fce36b72 100644 --- a/src/rich-text/markdown-serializer.ts +++ b/src/rich-text/markdown-serializer.ts @@ -8,6 +8,11 @@ import { richTextSchema } from "../shared/schema"; import { Node as ProsemirrorNode, Mark } from "prosemirror-model"; import { error } from "../shared/logger"; import { ExternalEditorPlugin } from "../shared/external-editor-plugin"; +import { + selfClosingElements, + supportedTagAttributes, + TagType, +} from "../shared/html-helpers"; // helper type so the code is a tad less messy export type MarkdownSerializerNodes = { @@ -23,7 +28,7 @@ export type MarkdownSerializerNodes = { function renderHtmlTag( state: MarkdownSerializerState, node: ProsemirrorNode, - selfClosing = false + tagType: TagType ): boolean { const markup = node.attrs.markup as string; if (!markup) { @@ -35,16 +40,32 @@ function renderHtmlTag( return false; } - // TODO attributes - // if the tag is self closing, just render the markup itself and return - if (selfClosing) { - state.text(markup.trim(), false); - return true; + const tag = markup.replace(/[<>/\s]/g, ""); + const openingTagStart = `<${tag}`; + + // start writing the opening tag + state.text(openingTagStart, false); + + // write the attributes if necessary + if (supportedTagAttributes[tagType]) { + // render the attributes in alpha order, since we cannot know what order they were originally written in + const attributes = supportedTagAttributes[tagType].sort(); + for (const attr of attributes) { + const value = node.attrs[attr] as string; + if (value) { + state.text(` ${attr}="${value}"`); + } + } } - const tag = markup.replace(/[<>]/g, ""); + // if the tag is self closing, just render the closing part of the original markup and return early + if (selfClosingElements.includes(tagType)) { + state.text(markup.replace(openingTagStart, ""), false); + return true; + } - state.text(`<${tag}>`, false); + // close the opening tag + state.text(">", false); // TODO will this always be inline content? state.renderInline(node); // @ts-expect-error TODO when writing to a closed block, it injects newline chars... @@ -60,7 +81,7 @@ function renderHtmlTag( const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = { ...defaultMarkdownSerializer.nodes, blockquote(state, node) { - if (renderHtmlTag(state, node)) { + if (renderHtmlTag(state, node, TagType.blockquote)) { return; } @@ -95,7 +116,7 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = { heading(state, node) { const markup = (node.attrs.markup as string) || ""; - if (renderHtmlTag(state, node)) { + if (renderHtmlTag(state, node, TagType.heading)) { return; } else if (markup && !markup.startsWith("#")) { // "underlined" heading (Setext heading) @@ -131,13 +152,13 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = { }); }, list_item(state, node) { - if (renderHtmlTag(state, node)) { + if (renderHtmlTag(state, node, TagType.list_item)) { return; } state.renderContent(node); }, paragraph(state, node) { - if (renderHtmlTag(state, node)) { + if (renderHtmlTag(state, node, TagType.paragraph)) { return; } state.renderInline(node); @@ -145,7 +166,10 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = { }, image(state, node) { - // TODO could be html + if (renderHtmlTag(state, node, TagType.image)) { + return; + } + state.write( "![" + state.esc(node.attrs.alt || "") + @@ -156,7 +180,7 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = { ); }, hard_break(state, node, parent, index) { - if (renderHtmlTag(state, node, true)) { + if (renderHtmlTag(state, node, TagType.hardbreak)) { return; } diff --git a/src/shared/html-helpers.ts b/src/shared/html-helpers.ts new file mode 100644 index 00000000..6db4e5ce --- /dev/null +++ b/src/shared/html-helpers.ts @@ -0,0 +1,73 @@ +/** + * Describes the supported html tags + * @see {@link https://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites|Supported tags} + */ +export enum TagType { + // Uncategorized + unknown, + comment, + + // Inline items + strike, //, , + strong, //, + emphasis, //, + hardbreak, //
,
(space agnostic) + code, + link, // [href] [title] + image, // [src] [width] [height] [alt] [title] + keyboard, + pre, + sup, + sub, + + // Block items + heading, //

,

,

,

,

,
(support full set of valid h tags) + paragraph, + horizontal_rule, + blockquote, + list_item, + ordered_list, + unordered_list, + + //TODO not yet implemented (needs added to schema in prosemirror) + dd, + dl, + dt, +} + +/** + * Describes the supported attributes for each html tag + * @see {@link https://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites|Supported tags} + */ +export const supportedTagAttributes: { [key in TagType]?: string[] } = { + [TagType.link]: ["href", "title"], + [TagType.image]: ["alt", "height", "src", "title", "width"], +}; + +/** + * Collection of elements that are counted as "block" level elements + * TODO change to a map for fast lookup? + */ +export const blockElements = [ + TagType.blockquote, + TagType.heading, + TagType.list_item, + TagType.ordered_list, + TagType.unordered_list, + TagType.dd, + TagType.dl, + TagType.dt, + TagType.paragraph, + TagType.horizontal_rule, + TagType.pre, +]; + +/** + * Collection of elements that are self-closing (e.g.
) + * TODO change to a map for fast lookup? + */ +export const selfClosingElements = [ + TagType.hardbreak, + TagType.image, + TagType.horizontal_rule, +]; diff --git a/src/shared/markdown-it/html.ts b/src/shared/markdown-it/html.ts index 9e584634..de04dd90 100644 --- a/src/shared/markdown-it/html.ts +++ b/src/shared/markdown-it/html.ts @@ -1,61 +1,12 @@ import MarkdownIt from "markdown-it"; import State from "markdown-it/lib/rules_core/state_core"; import Token from "markdown-it/lib/token"; - -/** - * Describes the supported html tags - * @see {@link https://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites|Supported tags} - */ -enum TagType { - // Uncategorized - unknown, - comment, - - // Inline items - strike, //, , - strong, //, - emphasis, //, - hardbreak, //
,
(space agnostic) - code, - link, //
[href] [title] - image, // [src] [width] [height] [alt] [title] - keyboard, - pre, - sup, - sub, - - // Block items - heading, //

,

,

,

,

,
(support full set of valid h tags) - paragraph, - horizontal_rule, - blockquote, - list_item, - ordered_list, - unordered_list, - - //TODO not yet implemented (needs added to schema in prosemirror) - dd, - dl, - dt, -} - -/** - * Collection of elements that are counted as "block" level elements - * TODO change to a map for fast lookup? - */ -const blockElements = [ - TagType.blockquote, - TagType.heading, - TagType.list_item, - TagType.ordered_list, - TagType.unordered_list, - TagType.dd, - TagType.dl, - TagType.dt, - TagType.paragraph, - TagType.horizontal_rule, - TagType.pre, -]; +import { + blockElements, + selfClosingElements, + supportedTagAttributes, + TagType, +} from "../html-helpers"; interface TagInfo { type: TagType; @@ -92,9 +43,6 @@ function getTagInfo(tag: string): TagInfo { // strip away all html characters and potential attibutes const tagName = tag.replace(/[<>/]/g, "").trim().split(/\s/)[0]; - let isSelfClosing = false; - const attributes: { [name: string]: string } = {}; - if (["del", "strike", "s"].includes(tagName)) { tagType = TagType.strike; } else if (["b", "strong"].includes(tagName)) { @@ -105,21 +53,12 @@ function getTagInfo(tag: string): TagInfo { tagType = TagType.code; } else if (tagName === "br") { tagType = TagType.hardbreak; - isSelfClosing = true; } else if (tagName === "blockquote") { tagType = TagType.blockquote; } else if (tagName === "a") { tagType = TagType.link; - attributes["href"] = /href=["'](.+?)["']/.exec(tag)?.[1] || ""; - attributes["title"] = /title=["'](.+?)["']/.exec(tag)?.[1] || ""; } else if (tagName === "img") { tagType = TagType.image; - attributes["src"] = /src=["'](.+?)["']/.exec(tag)?.[1] || ""; - attributes["width"] = /width=["'](.+?)["']/.exec(tag)?.[1] || ""; - attributes["height"] = /height=["'](.+?)["']/.exec(tag)?.[1] || ""; - attributes["alt"] = /alt=["'](.+?)["']/.exec(tag)?.[1] || ""; - attributes["title"] = /title=["'](.+?)["']/.exec(tag)?.[1] || ""; - isSelfClosing = true; } else if (/h[1,2,3,4,5,6]/.test(tagName)) { // NOTE: no need to set the level, the default `heading` generates this from the `tag` property tagType = TagType.heading; @@ -141,19 +80,28 @@ function getTagInfo(tag: string): TagInfo { tagType = TagType.paragraph; } else if (tagName === "hr") { tagType = TagType.horizontal_rule; - isSelfClosing = true; } else { tagType = TagType.unknown; } let markup = tagName ? `<${isClosingTag ? "/" : ""}${tagName}>` : ""; + const isSelfClosing = selfClosingElements.includes(tagType); if (isSelfClosing) { // sanitize the original markup for output // becomes markup = tag.replace(/^(<[a-z]+).*?(\s?\/?>)$/i, "$1$2"); } + const attributes: { [name: string]: string } = {}; + const supportedAttrs = supportedTagAttributes[tagType]; + if (supportedAttrs?.length) { + for (const attr of supportedAttrs) { + attributes[attr] = + new RegExp(`${attr}=["'](.+?)["']`).exec(tag)?.[1] || ""; + } + } + return { type: tagType, isSelfClosing: isSelfClosing, @@ -247,9 +195,10 @@ type parsedBlockTokenInfo = { function isParseableHtmlBlockToken(token: Token): parsedBlockTokenInfo { const content = token.content; // checks if a token matches `content` OR `
` - const matches = /^(?:(<[a-z0-9]+.*?>)([^<>]+?)(<\/[a-z0-9]+>))$|^(<[a-z0-9]+(?:\s.+?)?\s?\/?>)$/i.exec( - content - ); + const matches = + /^(?:(<[a-z0-9]+.*?>)([^<>]+?)(<\/[a-z0-9]+>))$|^(<[a-z0-9]+(?:\s.+?)?\s?\/?>)$/i.exec( + content + ); if (!matches) { return null; diff --git a/test/rich-text/markdown-serializer.test.ts b/test/rich-text/markdown-serializer.test.ts index 8618d7bb..5c042261 100644 --- a/test/rich-text/markdown-serializer.test.ts +++ b/test/rich-text/markdown-serializer.test.ts @@ -102,7 +102,7 @@ describe("markdown-serializer", () => { [`

html paragraph

`, `

html paragraph

`], ["```js\ntest\n```", "```js\ntest\n```"], ["~~~\ntest\n~~~", "~~~\ntest\n~~~"], - //[`
test
`, `
test
`], + [`
test
`, `
test
`], [`# ATX heading`, `# ATX heading`], //[`Setext heading\n===`, `Setext heading\n===`], [`

html heading

`, `

html heading

`], @@ -151,22 +151,24 @@ describe("markdown-serializer", () => { `![alt1](${crazyTestUrl} "title1")`, `![alt1](${crazyTestUrl} "title1")`, ], - // [ - // `alt1`, - // `alt1`, - // ], - // [ - // `alt1`, - // `alt1`, - // ], - // [ - // `alt1`, - // `alt1`, - // ], - // [ - // ``, - // ``, - // ], + [ + `alt1`, + `alt1`, + ], + [ + `alt1`, + `alt1`, + ], + [ + `alt1`, + `alt1`, + ], + [ + ``, + ``, + ], + // TODO attributes render in alpha order + //[``, ``], /* Soft/Hard breaks */ [`test\ntest`, `test\ntest`], [`test\n\ntest`, `test\n\ntest`],