Skip to content

Commit

Permalink
fix: add support for serializing node attributes to html
Browse files Browse the repository at this point in the history
  • Loading branch information
b-kelly committed Nov 9, 2021
1 parent a360b6d commit 0a00644
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 102 deletions.
52 changes: 38 additions & 14 deletions src/rich-text/markdown-serializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import { richTextSchema } from "../shared/schema";
import { Node as ProsemirrorNode, Mark } from "prosemirror-model";
import { error } from "../shared/logger";
import { ExternalEditorPlugin } from "../shared/external-editor-plugin";
import {
selfClosingElements,
supportedTagAttributes,
TagType,
} from "../shared/html-helpers";

// helper type so the code is a tad less messy
export type MarkdownSerializerNodes = {
Expand All @@ -23,7 +28,7 @@ export type MarkdownSerializerNodes = {
function renderHtmlTag(
state: MarkdownSerializerState,
node: ProsemirrorNode,
selfClosing = false
tagType: TagType
): boolean {
const markup = node.attrs.markup as string;
if (!markup) {
Expand All @@ -35,16 +40,32 @@ function renderHtmlTag(
return false;
}

// TODO attributes
// if the tag is self closing, just render the markup itself and return
if (selfClosing) {
state.text(markup.trim(), false);
return true;
const tag = markup.replace(/[<>/\s]/g, "");
const openingTagStart = `<${tag}`;

// start writing the opening tag
state.text(openingTagStart, false);

// write the attributes if necessary
if (supportedTagAttributes[tagType]) {
// render the attributes in alpha order, since we cannot know what order they were originally written in
const attributes = supportedTagAttributes[tagType].sort();
for (const attr of attributes) {
const value = node.attrs[attr] as string;
if (value) {
state.text(` ${attr}="${value}"`);
}
}
}

const tag = markup.replace(/[<>]/g, "");
// if the tag is self closing, just render the closing part of the original markup and return early
if (selfClosingElements.includes(tagType)) {
state.text(markup.replace(openingTagStart, ""), false);
return true;
}

state.text(`<${tag}>`, false);
// close the opening tag
state.text(">", false);
// TODO will this always be inline content?
state.renderInline(node);
// @ts-expect-error TODO when writing to a closed block, it injects newline chars...
Expand All @@ -60,7 +81,7 @@ function renderHtmlTag(
const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = {
...defaultMarkdownSerializer.nodes,
blockquote(state, node) {
if (renderHtmlTag(state, node)) {
if (renderHtmlTag(state, node, TagType.blockquote)) {
return;
}

Expand Down Expand Up @@ -95,7 +116,7 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = {
heading(state, node) {
const markup = (node.attrs.markup as string) || "";

if (renderHtmlTag(state, node)) {
if (renderHtmlTag(state, node, TagType.heading)) {
return;
} else if (markup && !markup.startsWith("#")) {
// "underlined" heading (Setext heading)
Expand Down Expand Up @@ -131,21 +152,24 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = {
});
},
list_item(state, node) {
if (renderHtmlTag(state, node)) {
if (renderHtmlTag(state, node, TagType.list_item)) {
return;
}
state.renderContent(node);
},
paragraph(state, node) {
if (renderHtmlTag(state, node)) {
if (renderHtmlTag(state, node, TagType.paragraph)) {
return;
}
state.renderInline(node);
state.closeBlock(node);
},

image(state, node) {
// TODO could be html
if (renderHtmlTag(state, node, TagType.image)) {
return;
}

state.write(
"![" +
state.esc(node.attrs.alt || "") +
Expand All @@ -156,7 +180,7 @@ const defaultMarkdownSerializerNodes: MarkdownSerializerNodes = {
);
},
hard_break(state, node, parent, index) {
if (renderHtmlTag(state, node, true)) {
if (renderHtmlTag(state, node, TagType.hardbreak)) {
return;
}

Expand Down
73 changes: 73 additions & 0 deletions src/shared/html-helpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/**
* Describes the supported html tags
* @see {@link https://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites|Supported tags}
*/
export enum TagType {
// Uncategorized
unknown,
comment,

// Inline items
strike, //<del>, <s>, <strike>
strong, //<b>, <strong>
emphasis, //<i>, <em>
hardbreak, //<br>, <br/> (space agnostic)
code,
link, // <a> [href] [title]
image, // <img /> [src] [width] [height] [alt] [title]
keyboard,
pre,
sup,
sub,

// Block items
heading, // <h1>, <h2>, <h3>, <h4>, <h5>, <h6> (support full set of valid h tags)
paragraph,
horizontal_rule,
blockquote,
list_item,
ordered_list,
unordered_list,

//TODO not yet implemented (needs added to schema in prosemirror)
dd,
dl,
dt,
}

/**
* Describes the supported attributes for each html tag
* @see {@link https://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites|Supported tags}
*/
export const supportedTagAttributes: { [key in TagType]?: string[] } = {
[TagType.link]: ["href", "title"],
[TagType.image]: ["alt", "height", "src", "title", "width"],
};

/**
* Collection of elements that are counted as "block" level elements
* TODO change to a map for fast lookup?
*/
export const blockElements = [
TagType.blockquote,
TagType.heading,
TagType.list_item,
TagType.ordered_list,
TagType.unordered_list,
TagType.dd,
TagType.dl,
TagType.dt,
TagType.paragraph,
TagType.horizontal_rule,
TagType.pre,
];

/**
* Collection of elements that are self-closing (e.g. <br/>)
* TODO change to a map for fast lookup?
*/
export const selfClosingElements = [
TagType.hardbreak,
TagType.image,
TagType.horizontal_rule,
];
91 changes: 20 additions & 71 deletions src/shared/markdown-it/html.ts
Original file line number Diff line number Diff line change
@@ -1,61 +1,12 @@
import MarkdownIt from "markdown-it";
import State from "markdown-it/lib/rules_core/state_core";
import Token from "markdown-it/lib/token";

/**
* Describes the supported html tags
* @see {@link https://meta.stackexchange.com/questions/1777/what-html-tags-are-allowed-on-stack-exchange-sites|Supported tags}
*/
enum TagType {
// Uncategorized
unknown,
comment,

// Inline items
strike, //<del>, <s>, <strike>
strong, //<b>, <strong>
emphasis, //<i>, <em>
hardbreak, //<br>, <br/> (space agnostic)
code,
link, // <a> [href] [title]
image, // <img /> [src] [width] [height] [alt] [title]
keyboard,
pre,
sup,
sub,

// Block items
heading, // <h1>, <h2>, <h3>, <h4>, <h5>, <h6> (support full set of valid h tags)
paragraph,
horizontal_rule,
blockquote,
list_item,
ordered_list,
unordered_list,

//TODO not yet implemented (needs added to schema in prosemirror)
dd,
dl,
dt,
}

/**
* Collection of elements that are counted as "block" level elements
* TODO change to a map for fast lookup?
*/
const blockElements = [
TagType.blockquote,
TagType.heading,
TagType.list_item,
TagType.ordered_list,
TagType.unordered_list,
TagType.dd,
TagType.dl,
TagType.dt,
TagType.paragraph,
TagType.horizontal_rule,
TagType.pre,
];
import {
blockElements,
selfClosingElements,
supportedTagAttributes,
TagType,
} from "../html-helpers";

interface TagInfo {
type: TagType;
Expand Down Expand Up @@ -92,9 +43,6 @@ function getTagInfo(tag: string): TagInfo {
// strip away all html characters and potential attibutes
const tagName = tag.replace(/[<>/]/g, "").trim().split(/\s/)[0];

let isSelfClosing = false;
const attributes: { [name: string]: string } = {};

if (["del", "strike", "s"].includes(tagName)) {
tagType = TagType.strike;
} else if (["b", "strong"].includes(tagName)) {
Expand All @@ -105,21 +53,12 @@ function getTagInfo(tag: string): TagInfo {
tagType = TagType.code;
} else if (tagName === "br") {
tagType = TagType.hardbreak;
isSelfClosing = true;
} else if (tagName === "blockquote") {
tagType = TagType.blockquote;
} else if (tagName === "a") {
tagType = TagType.link;
attributes["href"] = /href=["'](.+?)["']/.exec(tag)?.[1] || "";
attributes["title"] = /title=["'](.+?)["']/.exec(tag)?.[1] || "";
} else if (tagName === "img") {
tagType = TagType.image;
attributes["src"] = /src=["'](.+?)["']/.exec(tag)?.[1] || "";
attributes["width"] = /width=["'](.+?)["']/.exec(tag)?.[1] || "";
attributes["height"] = /height=["'](.+?)["']/.exec(tag)?.[1] || "";
attributes["alt"] = /alt=["'](.+?)["']/.exec(tag)?.[1] || "";
attributes["title"] = /title=["'](.+?)["']/.exec(tag)?.[1] || "";
isSelfClosing = true;
} else if (/h[1,2,3,4,5,6]/.test(tagName)) {
// NOTE: no need to set the level, the default `heading` generates this from the `tag` property
tagType = TagType.heading;
Expand All @@ -141,19 +80,28 @@ function getTagInfo(tag: string): TagInfo {
tagType = TagType.paragraph;
} else if (tagName === "hr") {
tagType = TagType.horizontal_rule;
isSelfClosing = true;
} else {
tagType = TagType.unknown;
}

let markup = tagName ? `<${isClosingTag ? "/" : ""}${tagName}>` : "";

const isSelfClosing = selfClosingElements.includes(tagType);
if (isSelfClosing) {
// sanitize the original markup for output
// <img title="asdfas" src="asdfasdf" /> becomes <img />
markup = tag.replace(/^(<[a-z]+).*?(\s?\/?>)$/i, "$1$2");
}

const attributes: { [name: string]: string } = {};
const supportedAttrs = supportedTagAttributes[tagType];
if (supportedAttrs?.length) {
for (const attr of supportedAttrs) {
attributes[attr] =
new RegExp(`${attr}=["'](.+?)["']`).exec(tag)?.[1] || "";
}
}

return {
type: tagType,
isSelfClosing: isSelfClosing,
Expand Down Expand Up @@ -247,9 +195,10 @@ type parsedBlockTokenInfo = {
function isParseableHtmlBlockToken(token: Token): parsedBlockTokenInfo {
const content = token.content;
// checks if a token matches `<open>content</close>` OR `<br />`
const matches = /^(?:(<[a-z0-9]+.*?>)([^<>]+?)(<\/[a-z0-9]+>))$|^(<[a-z0-9]+(?:\s.+?)?\s?\/?>)$/i.exec(
content
);
const matches =
/^(?:(<[a-z0-9]+.*?>)([^<>]+?)(<\/[a-z0-9]+>))$|^(<[a-z0-9]+(?:\s.+?)?\s?\/?>)$/i.exec(
content
);

if (!matches) {
return null;
Expand Down
36 changes: 19 additions & 17 deletions test/rich-text/markdown-serializer.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ describe("markdown-serializer", () => {
[`<p>html paragraph</p>`, `<p>html paragraph</p>`],
["```js\ntest\n```", "```js\ntest\n```"],
["~~~\ntest\n~~~", "~~~\ntest\n~~~"],
//[`<pre><code>test</code></pre>`, `<pre><code>test</code></pre>`],
[`<pre><code>test</code></pre>`, `<pre><code>test</code></pre>`],
[`# ATX heading`, `# ATX heading`],
//[`Setext heading\n===`, `Setext heading\n===`],
[`<h1>html heading</h1>`, `<h1>html heading</h1>`],
Expand Down Expand Up @@ -151,22 +151,24 @@ describe("markdown-serializer", () => {
`![alt1](${crazyTestUrl} "title1")`,
`![alt1](${crazyTestUrl} "title1")`,
],
// [
// `<img src="src1" alt="alt1" title="title1">`,
// `<img src="src1" alt="alt1" title="title1">`,
// ],
// [
// `<img src="src1" alt="alt1" title="title1"/>`,
// `<img src="src1" alt="alt1" title="title1"/>`,
// ],
// [
// `<img src="src1" alt="alt1" title="title1" />`,
// `<img src="src1" alt="alt1" title="title1" />`,
// ],
// [
// `<img src="src1" height="10" width="10" />`,
// `<img src="src1" height="10" width="10" />`,
// ],
[
`<img alt="alt1" src="src1" title="title1">`,
`<img alt="alt1" src="src1" title="title1">`,
],
[
`<img alt="alt1" src="src1" title="title1"/>`,
`<img alt="alt1" src="src1" title="title1"/>`,
],
[
`<img alt="alt1" src="src1" title="title1" />`,
`<img alt="alt1" src="src1" title="title1" />`,
],
[
`<img height="10" src="src1" width="10" />`,
`<img height="10" src="src1" width="10" />`,
],
// TODO attributes render in alpha order
//[`<img src="src1" width="10" height="10" />`, `<img src="src1" height="10" width="10" />`],
/* Soft/Hard breaks */
[`test\ntest`, `test\ntest`],
[`test\n\ntest`, `test\n\ntest`],
Expand Down

0 comments on commit 0a00644

Please sign in to comment.