/
to_markdown.js
406 lines (374 loc) · 14.4 KB
/
to_markdown.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
// ::- A specification for serializing a ProseMirror document as
// Markdown/CommonMark text.
export class MarkdownSerializer {
// :: (Object<(state: MarkdownSerializerState, node: Node, parent: Node, index: number)>, Object)
// Construct a serializer with the given configuration. The `nodes`
// object should map node names in a given schema to function that
// take a serializer state and such a node, and serialize the node.
//
// The `marks` object should hold objects with `open` and `close`
// properties, which hold the strings that should appear before and
// after a piece of text marked that way, either directly or as a
// function that takes a serializer state and a mark, and returns a
// string. `open` and `close` can also be functions, which will be
// called as
//
// (state: MarkdownSerializerState, mark: Mark,
// parent: Fragment, index: number) → string
//
// Where `parent` and `index` allow you to inspect the mark's
// context to see which nodes it applies to.
//
// Mark information objects can also have a `mixable` property
// which, when `true`, indicates that the order in which the mark's
// opening and closing syntax appears relative to other mixable
// marks can be varied. (For example, you can say `**a *b***` and
// `*a **b***`, but not `` `a *b*` ``.)
//
// To disable character escaping in a mark, you can give it an
// `escape` property of `false`. Such a mark has to have the highest
// precedence (must always be the innermost mark).
//
// The `expelEnclosingWhitespace` mark property causes the
// serializer to move enclosing whitespace from inside the marks to
// outside the marks. This is necessary for emphasis marks as
// CommonMark does not permit enclosing whitespace inside emphasis
// marks, see: http://spec.commonmark.org/0.26/#example-330
constructor(nodes, marks) {
// :: Object<(MarkdownSerializerState, Node)> The node serializer
// functions for this serializer.
this.nodes = nodes
// :: Object The mark serializer info.
this.marks = marks
}
// :: (Node, ?Object) → string
// Serialize the content of the given node to
// [CommonMark](http://commonmark.org/).
serialize(content, options) {
let state = new MarkdownSerializerState(this.nodes, this.marks, options)
state.renderContent(content)
return state.out
}
}
// :: MarkdownSerializer
// A serializer for the [basic schema](#schema).
export const defaultMarkdownSerializer = new MarkdownSerializer({
blockquote(state, node) {
state.wrapBlock("> ", null, node, () => state.renderContent(node))
},
code_block(state, node) {
state.write("```" + (node.attrs.params || "") + "\n")
state.text(node.textContent, false)
state.ensureNewLine()
state.write("```")
state.closeBlock(node)
},
heading(state, node) {
state.write(state.repeat("#", node.attrs.level) + " ")
state.renderInline(node)
state.closeBlock(node)
},
horizontal_rule(state, node) {
state.write(node.attrs.markup || "---")
state.closeBlock(node)
},
bullet_list(state, node) {
state.renderList(node, " ", () => (node.attrs.bullet || "*") + " ")
},
ordered_list(state, node) {
let start = node.attrs.order || 1
let maxW = String(start + node.childCount - 1).length
let space = state.repeat(" ", maxW + 2)
state.renderList(node, space, i => {
let nStr = String(start + i)
return state.repeat(" ", maxW - nStr.length) + nStr + ". "
})
},
list_item(state, node) {
state.renderContent(node)
},
paragraph(state, node) {
state.renderInline(node)
state.closeBlock(node)
},
image(state, node) {
state.write("![" + state.esc(node.attrs.alt || "") + "](" + node.attrs.src +
(node.attrs.title ? ' "' + node.attrs.title.replace(/"/g, '\\"') + '"' : "") + ")")
},
hard_break(state, node, parent, index) {
for (let i = index + 1; i < parent.childCount; i++)
if (parent.child(i).type != node.type) {
state.write("\\\n")
return
}
},
text(state, node) {
state.text(node.text)
}
}, {
em: {open: "*", close: "*", mixable: true, expelEnclosingWhitespace: true},
strong: {open: "**", close: "**", mixable: true, expelEnclosingWhitespace: true},
link: {
open(_state, mark, parent, index) {
return isPlainURL(mark, parent, index, 1) ? "<" : "["
},
close(state, mark, parent, index) {
return isPlainURL(mark, parent, index, -1) ? ">"
: "](" + mark.attrs.href + (mark.attrs.title ? ' "' + mark.attrs.title.replace(/"/g, '\\"') + '"' : "") + ")"
}
},
code: {open(_state, _mark, parent, index) { return backticksFor(parent.child(index), -1) },
close(_state, _mark, parent, index) { return backticksFor(parent.child(index - 1), 1) },
escape: false}
})
function backticksFor(node, side) {
let ticks = /`+/g, m, len = 0
if (node.isText) while (m = ticks.exec(node.text)) len = Math.max(len, m[0].length)
let result = len > 0 && side > 0 ? " `" : "`"
for (let i = 0; i < len; i++) result += "`"
if (len > 0 && side < 0) result += " "
return result
}
function isPlainURL(link, parent, index, side) {
if (link.attrs.title || !/^\w+:/.test(link.attrs.href)) return false
let content = parent.child(index + (side < 0 ? -1 : 0))
if (!content.isText || content.text != link.attrs.href || content.marks[content.marks.length - 1] != link) return false
if (index == (side < 0 ? 1 : parent.childCount - 1)) return true
let next = parent.child(index + (side < 0 ? -2 : 1))
return !link.isInSet(next.marks)
}
// ::- This is an object used to track state and expose
// methods related to markdown serialization. Instances are passed to
// node and mark serialization methods (see `toMarkdown`).
export class MarkdownSerializerState {
constructor(nodes, marks, options) {
this.nodes = nodes
this.marks = marks
this.delim = this.out = ""
this.closed = false
this.inTightList = false
// :: Object
// The options passed to the serializer.
// tightLists:: ?bool
// Whether to render lists in a tight style. This can be overridden
// on a node level by specifying a tight attribute on the node.
// Defaults to false.
this.options = options || {}
if (typeof this.options.tightLists == "undefined")
this.options.tightLists = false
}
flushClose(size) {
if (this.closed) {
if (!this.atBlank()) this.out += "\n"
if (size == null) size = 2
if (size > 1) {
let delimMin = this.delim
let trim = /\s+$/.exec(delimMin)
if (trim) delimMin = delimMin.slice(0, delimMin.length - trim[0].length)
for (let i = 1; i < size; i++)
this.out += delimMin + "\n"
}
this.closed = false
}
}
// :: (string, ?string, Node, ())
// Render a block, prefixing each line with `delim`, and the first
// line in `firstDelim`. `node` should be the node that is closed at
// the end of the block, and `f` is a function that renders the
// content of the block.
wrapBlock(delim, firstDelim, node, f) {
let old = this.delim
this.write(firstDelim || delim)
this.delim += delim
f()
this.delim = old
this.closeBlock(node)
}
atBlank() {
return /(^|\n)$/.test(this.out)
}
// :: ()
// Ensure the current content ends with a newline.
ensureNewLine() {
if (!this.atBlank()) this.out += "\n"
}
// :: (?string)
// Prepare the state for writing output (closing closed paragraphs,
// adding delimiters, and so on), and then optionally add content
// (unescaped) to the output.
write(content) {
this.flushClose()
if (this.delim && this.atBlank())
this.out += this.delim
if (content) this.out += content
}
// :: (Node)
// Close the block for the given node.
closeBlock(node) {
this.closed = node
}
// :: (string, ?bool)
// Add the given text to the document. When escape is not `false`,
// it will be escaped.
text(text, escape) {
let lines = text.split("\n")
for (let i = 0; i < lines.length; i++) {
var startOfLine = this.atBlank() || this.closed
this.write()
this.out += escape !== false ? this.esc(lines[i], startOfLine) : lines[i]
if (i != lines.length - 1) this.out += "\n"
}
}
// :: (Node)
// Render the given node as a block.
render(node, parent, index) {
if (typeof parent == "number") throw new Error("!")
if (!this.nodes[node.type.name]) throw new Error("Token type `" + node.type.name + "` not supported by Markdown renderer")
this.nodes[node.type.name](this, node, parent, index)
}
// :: (Node)
// Render the contents of `parent` as block nodes.
renderContent(parent) {
parent.forEach((node, _, i) => this.render(node, parent, i))
}
// :: (Node)
// Render the contents of `parent` as inline content.
renderInline(parent) {
let active = [], trailing = ""
let progress = (node, _, index) => {
let marks = node ? node.marks : []
// Remove marks from `hard_break` that are the last node inside
// that mark to prevent parser edge cases with new lines just
// before closing marks.
// (FIXME it'd be nice if we had a schema-agnostic way to
// identify nodes that serialize as hard breaks)
if (node && node.type.name === "hard_break")
marks = marks.filter(m => {
if (index + 1 == parent.childCount) return false
let next = parent.child(index + 1)
return m.isInSet(next.marks) && (!next.isText || /\S/.test(next.text))
})
let leading = trailing
trailing = ""
// If whitespace has to be expelled from the node, adjust
// leading and trailing accordingly.
if (node && node.isText && marks.some(mark => {
let info = this.marks[mark.type.name]
return info && info.expelEnclosingWhitespace
})) {
let [_, lead, inner, trail] = /^(\s*)(.*?)(\s*)$/m.exec(node.text)
leading += lead
trailing = trail
if (lead || trail) {
node = inner ? node.withText(inner) : null
if (!node) marks = active
}
}
let inner = marks.length && marks[marks.length - 1], noEsc = inner && this.marks[inner.type.name].escape === false
let len = marks.length - (noEsc ? 1 : 0)
// Try to reorder 'mixable' marks, such as em and strong, which
// in Markdown may be opened and closed in different order, so
// that order of the marks for the token matches the order in
// active.
outer: for (let i = 0; i < len; i++) {
let mark = marks[i]
if (!this.marks[mark.type.name].mixable) break
for (let j = 0; j < active.length; j++) {
let other = active[j]
if (!this.marks[other.type.name].mixable) break
if (mark.eq(other)) {
if (i > j)
marks = marks.slice(0, j).concat(mark).concat(marks.slice(j, i)).concat(marks.slice(i + 1, len))
else if (j > i)
marks = marks.slice(0, i).concat(marks.slice(i + 1, j)).concat(mark).concat(marks.slice(j, len))
continue outer
}
}
}
// Find the prefix of the mark set that didn't change
let keep = 0
while (keep < Math.min(active.length, len) && marks[keep].eq(active[keep])) ++keep
// Close the marks that need to be closed
while (keep < active.length)
this.text(this.markString(active.pop(), false, parent, index), false)
// Output any previously expelled trailing whitespace outside the marks
if (leading) this.text(leading)
// Open the marks that need to be opened
if (node) {
while (active.length < len) {
let add = marks[active.length]
active.push(add)
this.text(this.markString(add, true, parent, index), false)
}
// Render the node. Special case code marks, since their content
// may not be escaped.
if (noEsc && node.isText)
this.text(this.markString(inner, true, parent, index) + node.text +
this.markString(inner, false, parent, index + 1), false)
else
this.render(node, parent, index)
}
}
parent.forEach(progress)
progress(null, null, parent.childCount)
}
// :: (Node, string, (number) → string)
// Render a node's content as a list. `delim` should be the extra
// indentation added to all lines except the first in an item,
// `firstDelim` is a function going from an item index to a
// delimiter for the first line of the item.
renderList(node, delim, firstDelim) {
if (this.closed && this.closed.type == node.type)
this.flushClose(3)
else if (this.inTightList)
this.flushClose(1)
let isTight = typeof node.attrs.tight != "undefined" ? node.attrs.tight : this.options.tightLists
let prevTight = this.inTightList
this.inTightList = isTight
node.forEach((child, _, i) => {
if (i && isTight) this.flushClose(1)
this.wrapBlock(delim, firstDelim(i), node, () => this.render(child, node, i))
})
this.inTightList = prevTight
}
// :: (string, ?bool) → string
// Escape the given string so that it can safely appear in Markdown
// content. If `startOfLine` is true, also escape characters that
// have special meaning only at the start of the line.
esc(str, startOfLine) {
str = str.replace(
/[`*\\~\[\]_]/g,
(m, i) => m == "_" && i > 0 && i + 1 < str.length && str[i-1].match(/\w/) && str[i+1].match(/\w/) ? m : "\\" + m
)
if (startOfLine) str = str.replace(/^[:#\-*+>]/, "\\$&").replace(/^(\s*\d+)\./, "$1\\.")
return str
}
quote(str) {
var wrap = str.indexOf('"') == -1 ? '""' : str.indexOf("'") == -1 ? "''" : "()"
return wrap[0] + str + wrap[1]
}
// :: (string, number) → string
// Repeat the given string `n` times.
repeat(str, n) {
let out = ""
for (let i = 0; i < n; i++) out += str
return out
}
// : (Mark, bool, string?) → string
// Get the markdown string for a given opening or closing mark.
markString(mark, open, parent, index) {
let info = this.marks[mark.type.name]
let value = open ? info.open : info.close
return typeof value == "string" ? value : value(this, mark, parent, index)
}
// :: (string) → { leading: ?string, trailing: ?string }
// Get leading and trailing whitespace from a string. Values of
// leading or trailing property of the return object will be undefined
// if there is no match.
getEnclosingWhitespace(text) {
return {
leading: (text.match(/^(\s+)/) || [])[0],
trailing: (text.match(/(\s+)$/) || [])[0]
}
}
}