Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/fix-blockquote-md.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
default: patch
---

Fixed blockquotes needing a double backslash to escape and require a space after the `>` in order to form a blockquote.
5 changes: 5 additions & 0 deletions .changeset/fix-latex-codeblock.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
default: patch
---

Fix latex in codeblocks getting parsed.
155 changes: 155 additions & 0 deletions src/app/plugins/markdown/extensions/matrix-math.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,160 @@
import type { TokenizerExtension, RendererExtension } from 'marked';

/** Private-use char so math extensions do not match `$` / `$$` inside code spans. Not U+E000–U+E002 (emoticon placeholders). */
export const MATH_CODE_DOLLAR_MASK = '\uE020';

function findSameLineFenceClose(md: string, from: number, tick: string, minLen: number): number {
let j = from;
while (j < md.length && md[j] !== '\n') {
if (md[j] === tick) {
let run = 0;
while (j + run < md.length && md[j + run] === tick) run++;
if (run >= minLen) return j;
j += run;
} else {
j++;
}
}
return -1;
}

function findMultilineFenceEnd(
md: string,
contentStart: number,
tick: string,
minLen: number
): { blockEnd: number; contentEnd: number } | null {
let p = contentStart;
while (p <= md.length) {
const nl = md.indexOf('\n', p);
const lineStart = p;
const lineEnd = nl === -1 ? md.length : nl;
const line = md.slice(lineStart, lineEnd);
const m = tick === '`' ? /^ {0,3}(`{3,})\s*$/.exec(line) : /^ {0,3}(~{3,})\s*$/.exec(line);
const fenceRun = m?.[1];
if (fenceRun && fenceRun.length >= minLen && fenceRun[0] === tick) {
return {
blockEnd: nl === -1 ? md.length : nl + 1,
contentEnd: lineStart,
};
}
if (nl === -1) return null;
p = nl + 1;
}
return null;
}

function tryConsumeFence(md: string, i: number): { text: string; end: number } | null {
const atLineStart = i === 0 || md[i - 1] === '\n';
if (!atLineStart) return null;

const rest = md.slice(i);
const open = /^(\s{0,3})(`{3,}|~{3,})/.exec(rest);
if (!open?.[2]) return null;

const fenceStr = open[2];
const tick = fenceStr.charAt(0);
const openLen = fenceStr.length;
const afterOpen = i + open[0].length;

if (afterOpen < md.length && md[afterOpen] === '\n') {
const contentStart = afterOpen + 1;
const close = findMultilineFenceEnd(md, contentStart, tick, openLen);
if (!close) {
const inner = md.slice(contentStart, md.length);
const masked = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
return { text: md.slice(i, contentStart) + masked, end: md.length };
}
const inner = md.slice(contentStart, close.contentEnd);
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
return {
text: md.slice(i, contentStart) + maskedInner + md.slice(close.contentEnd, close.blockEnd),
end: close.blockEnd,
};
}

const closeIdx = findSameLineFenceClose(md, afterOpen, tick, openLen);
if (closeIdx < 0) return null;

let closeRun = 0;
while (closeIdx + closeRun < md.length && md[closeIdx + closeRun] === tick) closeRun++;

const inner = md.slice(afterOpen, closeIdx);
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
return {
text: md.slice(i, afterOpen) + maskedInner + md.slice(closeIdx, closeIdx + closeRun),
end: closeIdx + closeRun,
};
}

function tryConsumeInlineCode(md: string, i: number): { text: string; end: number } | null {
if (md[i] !== '`') return null;
let run = 0;
while (i + run < md.length && md[i + run] === '`') run++;
const contentStart = i + run;
let j = contentStart;
while (j < md.length) {
if (md[j] === '`') {
let cr = 0;
while (j + cr < md.length && md[j + cr] === '`') cr++;
if (cr === run) {
const inner = md.slice(contentStart, j);
const maskedInner = inner.replace(/\$/g, MATH_CODE_DOLLAR_MASK);
return {
text: md.slice(i, contentStart) + maskedInner + md.slice(j, j + run),
end: j + run,
};
}
j += cr;
} else {
j++;
}
}
return null;
}

/**
* Replaces `$` inside fenced and inline code so Matrix math extensions do not run on code literals.
* {@link unmaskMathCodeDollarPlaceholders} must be applied to the final HTML.
*/
export function maskDollarSignsInsideMarkdownCode(markdown: string): string {
const md = markdown.replace(/\r\n/g, '\n');
let out = '';
let i = 0;
const n = md.length;

while (i < n) {
const atLineStart = i === 0 || md[i - 1] === '\n';

if (atLineStart) {
const fence = tryConsumeFence(md, i);
if (fence) {
out += fence.text;
i = fence.end;
continue;
}
}

if (md[i] === '`') {
const span = tryConsumeInlineCode(md, i);
if (span) {
out += span.text;
i = span.end;
continue;
}
}

out += md[i];
i++;
}

return out;
}

export function unmaskMathCodeDollarPlaceholders(html: string): string {
return html.replaceAll(MATH_CODE_DOLLAR_MASK, '$');
}

function escapeHtml(text: string): string {
return text
.replace(/&/g, '&amp;')
Expand Down
39 changes: 39 additions & 0 deletions src/app/plugins/markdown/markdownToHtml.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,27 @@ describe('markdownToHtml', () => {
expect(result).toContain('E = mc^2');
});

it('does not parse dollars inside fenced code as math', () => {
expect(markdownToHtml('```\n$$test$$\n```')).not.toContain('data-mx-maths');
expect(markdownToHtml('```\n$$test$$\n```')).toContain('$$test$$');
});

it('does not parse dollars inside single-line fenced code as math', () => {
expect(markdownToHtml('```$$test$$```')).not.toContain('data-mx-maths');
expect(markdownToHtml('```$$test$$```')).toContain('$$test$$');
});

it('does not parse dollars inside inline code as math', () => {
expect(markdownToHtml('`$$test$$`')).not.toContain('data-mx-maths');
expect(markdownToHtml('`$$test$$`')).toContain('$$test$$');
});

it('does not parse inline math when dollars are only inside backticks in a sentence', () => {
const result = markdownToHtml('See `$$test$$` here.');
expect(result).not.toContain('data-mx-maths');
expect(result).toContain('$$test$$');
});

it('converts block math syntax', () => {
const result = markdownToHtml('$$\\frac{a}{b}$$');
expect(result).toContain('data-mx-maths');
Expand Down Expand Up @@ -70,6 +91,24 @@ describe('markdownToHtml', () => {
expect(result).toContain('not bold');
});

it('does not treat >:3 as a block quote (requires space after >)', () => {
const result = markdownToHtml('>:3');
expect(result).not.toContain('<blockquote>');
expect(result).toContain(':3');
});

it('treats > followed by space as block quote', () => {
const result = markdownToHtml('> quoted');
expect(result).toContain('<blockquote>');
expect(result).toContain('quoted');
});

it('escapes block quote with a single backslash before >', () => {
const result = markdownToHtml('\\>:3');
expect(result).not.toContain('<blockquote>');
expect(result).toContain(':3');
});

it('preserves img[data-mx-emoticon] tags with valid mxc URLs', () => {
const html =
'<img data-mx-emoticon src="mxc://example.org/emote" alt=":blobcat:" title=":blobcat:" height="32" />';
Expand Down
27 changes: 20 additions & 7 deletions src/app/plugins/markdown/markdownToHtml.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import { marked } from 'marked';
import DOMPurify from 'dompurify';
import { matrixSpoilerExtension } from './extensions/matrix-spoiler';
import { matrixMathExtension, matrixMathBlockExtension } from './extensions/matrix-math';
import {
matrixMathExtension,
matrixMathBlockExtension,
maskDollarSignsInsideMarkdownCode,
unmaskMathCodeDollarPlaceholders,
} from './extensions/matrix-math';
import { matrixSubscriptExtension } from './extensions/matrix-subscript';
import { matrixEmoticonExtension, preprocessEmoticon } from './extensions/matrix-emoticon';
import { unescapeMarkdownBlockSequences, unescapeMarkdownInlineSequences } from './utils';
import {
escapeLineStartBlockquoteWithoutFollowingSpace,
unescapeMarkdownInlineSequences,
} from './utils';

// Configure marked with Matrix extensions
const processor = marked.use({
Expand Down Expand Up @@ -50,13 +58,15 @@ export function markdownToHtml(markdown: string): string {
// (e.g., &lt; becomes < for link URLs)
const decoded = decodeHtmlEntities(markdown);

// First unescape any block-level escape sequences (e.g., \>, \#)
const unescapedBlocks = unescapeMarkdownBlockSequences(decoded, (text) => text);
// Only treat `> ` as block quote, escape bare `>` at line start (e.g. `>:3`)
const blockquotePrefixed = escapeLineStartBlockquoteWithoutFollowingSpace(decoded);

const preprocessed = preprocessEmoticon(unescapedBlocks);
const preprocessed = preprocessEmoticon(blockquotePrefixed);

const mathInput = maskDollarSignsInsideMarkdownCode(preprocessed);

// Parse markdown to HTML using marked with our Matrix extensions
const html = processor.parse(preprocessed) as string;
const html = processor.parse(mathInput) as string;

// Unescape inline sequences (e.g., \*, \_) after parsing
const unescapedInline = unescapeMarkdownInlineSequences(html);
Expand Down Expand Up @@ -136,5 +146,8 @@ export function markdownToHtml(markdown: string): string {

DOMPurify.removeHook('afterSanitizeAttributes');

return sanitized.replace(/<li>(<p><\/p>)?<\/li>/gi, '<li><br></li>');
return unmaskMathCodeDollarPlaceholders(sanitized).replace(
/<li>(<p><\/p>)?<\/li>/gi,
'<li><br></li>'
);
}
48 changes: 5 additions & 43 deletions src/app/plugins/markdown/utils.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
import { findAndReplace } from '$utils/findAndReplace';

// Regex patterns for block-level markdown escape sequences
// These match escaped markdown characters like \>, \#, \`, etc.
const ESC_BLOCK_SEQ = /^\\(\\*[#>[ `])/;
const UN_ESC_BLOCK_SEQ = /^\*[#>[ `]/;

// URL-aware pattern for inline sequences
const URL_NEG_LB = '(?<!(?:https?|ftp|mailto|magnet):\\/\\/\\S*)';
const INLINE_SEQUENCE_SET = '[*_~`|]';
Expand Down Expand Up @@ -56,42 +51,9 @@ export const escapeMarkdownInlineSequences = (text: string): string => {
};

/**
* Removes escape sequences from markdown block elements in the given plain-text.
* This function unescapes characters that are escaped with backslashes (e.g., `\>`, `\#`)
* in markdown syntax, returning the original plain-text with markdown characters in effect.
*
* @param {string} text - The input markdown plain-text containing escape characters (e.g., `\> block quote`).
* @param {function} processPart - It takes the plain-text as input and returns a modified version of it.
* @returns {string} The plain-text with markdown escape sequences removed and markdown formatting applied.
* CommonMark treats `>` at line start as a block quote marker even when not followed by
* space. We only start a block quote when `>` is followed by horizontal whitespace.
* Lines like `>:3` get a backslash so the `>` is literal.
*/
export const unescapeMarkdownBlockSequences = (
text: string,
processPart: (text: string) => string
): string => {
const match = text.match(ESC_BLOCK_SEQ);

if (!match) return processPart(text);

const [, g1] = match;
return text.replace(ESC_BLOCK_SEQ, g1 ?? '');
};

/**
* Escapes markdown block elements by adding backslashes before markdown characters
* (e.g., `\>`, `\#`) that are normally interpreted as markdown syntax.
*
* @param {string} text - The input markdown plain-text that may contain markdown elements (e.g., `> block quote`).
* @param {function} processPart - It takes the plain-text as input and returns a modified version of it.
* @returns {string} The plain-text with markdown escape sequences added, preventing markdown formatting.
*/
export const escapeMarkdownBlockSequences = (
text: string,
processPart: (text: string) => string
): string => {
const match = text.match(UN_ESC_BLOCK_SEQ);

if (!match) return processPart(text);

const [, g1] = match;
return text.replace(UN_ESC_BLOCK_SEQ, `\\${g1}`);
};
export const escapeLineStartBlockquoteWithoutFollowingSpace = (markdown: string): string =>
markdown.replace(/^(\s*)>(?![ \t])/gm, '$1\\>');
Loading