diff --git a/README.md b/README.md index 172d271043..279a25c089 100644 --- a/README.md +++ b/README.md @@ -75,8 +75,10 @@ You can provide an object of options as the last argument to `katex.render` and - `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color that unsupported commands and invalid LaTeX are rendered in when `throwOnError` is set to `false`. (default: `#cc0000`) - `macros`: `object`. A collection of custom macros. Each macro is a property with a name like `\name` (written `"\\name"` in JavaScript) which maps to a string that describes the expansion of the macro. Single-character keys can also be included in which case the character will be redefined as the given macro (similar to TeX active characters). - `colorIsTextColor`: `boolean`. If `true`, `\color` will work like LaTeX's `\textcolor`, and take two arguments (e.g., `\color{blue}{hello}`), which restores the old behavior of KaTeX (pre-0.8.0). If `false` (the default), `\color` will work like LaTeX's `\color`, and take one argument (e.g., `\color{blue}hello`). In both cases, `\textcolor` works as in LaTeX (e.g., `\textcolor{blue}{hello}`). -- `unicodeTextInMathMode`: `boolean`. If `true`, supported unicode text characters like `é` and `試` will also work in math mode. (They always work in text mode.) The default is `false`, which matches XeTeX behavior; `true` emulates MathJax behavior. - `maxSize`: `number`. If non-zero, all user-specified sizes, e.g. in `\rule{500em}{500em}`, will be capped to `maxSize` ems. Otherwise, users can make elements and spaces arbitrarily large (the default behavior). +- `strict`: `boolean` or `string` or `function` (default: `"warn"`). If `false` or `"ignore`", allow features that make writing LaTeX convenient but are not actually supported by (Xe)LaTeX (similar to MathJax). If `true` or `"error"` (LaTeX faithfulness mode), throw an error for any such transgressions. If `"warn"` (the default), warn about such behavior via `console.warn`. Provide a custom function `handler(errorCode, errorMsg, token)` to customize behavior depending on the type of transgression (summarized by the string code `errorCode` and detailed in `errorMsg`); this function can also return `"ignore"`, `"error"`, or `"warn"` to use a built-in behavior. A list of such features and their `errorCode`s: + - `"unicodeTextInMathMode"`: Use of Unicode text characters in math mode. + - `"mathVsTextUnits"`: Mismatch of math vs. text commands and units/mode. For example: @@ -129,9 +131,6 @@ will appear larger than 1cm in browser units. - MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's `colorIsTextColor` option to `true` for this behavior. KaTeX's default behavior matches MathJax with its `color.js` extension enabled. -- MathJax supports Unicode text characters in math mode, unlike LaTeX. - To support this behavior in KaTeX, set the `unicodeTextInMathMode` option - to `true`. - KaTeX breaks lines with `\\` and `\newline` in inline math, but ignores them in display math (matching LaTeX's behavior, but not MathJax's behavior). To allow `\\` and `\newline` to break lines in display mode, diff --git a/package.json b/package.json index 4382b25c5e..382b8c26f7 100644 --- a/package.json +++ b/package.json @@ -87,6 +87,7 @@ "nomnom": "^1.8.1" }, "jest": { + "setupTestFrameworkScriptFile": "/test/setup.js", "snapshotSerializers": [ "jest-serializer-html" ], diff --git a/src/Parser.js b/src/Parser.js index 66b95ddd6a..b9a96aeb91 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -941,12 +941,15 @@ export default class Parser { }, "text"), nucleus); } // At this point, we should have a symbol, possibly with accents. - // First expand any accented base symbol according to unicodeSymbols, - // unless we're in math mode and unicodeTextInMathMode is false - // (XeTeX-compatible mode). + // First expand any accented base symbol according to unicodeSymbols. if (unicodeSymbols.hasOwnProperty(text[0]) && - !symbols[this.mode][text[0]] && - (this.settings.unicodeTextInMathMode || this.mode === "text")) { + !symbols[this.mode][text[0]]) { + // This behavior is not strict (XeTeX-compatible) in math mode. + if (this.settings.strict && this.mode === "math") { + this.settings.nonstrict("unicodeTextInMathMode", + `Accented Unicode text character "${text[0]}" used in ` + + `math mode`, nucleus); + } text = unicodeSymbols[text[0]] + text.substr(1); } // Strip off any combining characters @@ -962,15 +965,20 @@ export default class Parser { // Recognize base symbol let symbol = null; if (symbols[this.mode][text]) { - if (this.mode === 'math' && extraLatin.indexOf(text) >= 0 && - !this.settings.unicodeTextInMathMode) { - throw new ParseError(`Unicode text character ${text} used in ` + - `math mode without unicodeTextInMathMode setting`, nucleus); + if (this.settings.strict && this.mode === 'math' && + extraLatin.indexOf(text) >= 0) { + this.settings.nonstrict("unicodeTextInMathMode", + `Latin-1/Unicode text character "${text[0]}" used in ` + + `math mode`, nucleus); } symbol = new ParseNode(symbols[this.mode][text].group, text, this.mode, nucleus); - } else if (supportedCodepoint(text.charCodeAt(0)) && - (this.mode === "text" || this.settings.unicodeTextInMathMode)) { + } else if (supportedCodepoint(text.charCodeAt(0))) { + if (this.settings.strict && this.mode === 'math') { + this.settings.nonstrict("unicodeTextInMathMode", + `Unicode text character "${text[0]}" used in math mode`, + nucleus); + } symbol = new ParseNode("textord", text, this.mode, nucleus); } else { return null; // EOF, ^, _, {, }, etc. diff --git a/src/Settings.js b/src/Settings.js index f7bdc00dbf..e7a406a13c 100644 --- a/src/Settings.js +++ b/src/Settings.js @@ -1,20 +1,28 @@ // @flow +/* eslint no-console:0 */ /** * This is a module for storing settings passed into KaTeX. It correctly handles * default settings. */ import utils from "./utils"; +import ParseError from "./ParseError.js"; +import ParseNode from "./ParseNode"; +import {Token} from "./Token"; import type { MacroMap } from "./macros"; +export type StrictFunction = + (errorCode: string, errorMsg: string, token?: Token | ParseNode<*>) => + ?(boolean | string); + export type SettingsOptions = { displayMode?: boolean; throwOnError?: boolean; errorColor?: string; macros?: MacroMap; colorIsTextColor?: boolean; - unicodeTextInMathMode?: boolean; + strict?: boolean | "ignore" | "warn" | "error" | StrictFunction; maxSize?: number; }; @@ -34,7 +42,7 @@ class Settings { errorColor: string; macros: MacroMap; colorIsTextColor: boolean; - unicodeTextInMathMode: boolean; + strict: boolean | "ignore" | "warn" | "error" | StrictFunction; maxSize: number; constructor(options: SettingsOptions) { @@ -45,10 +53,37 @@ class Settings { this.errorColor = utils.deflt(options.errorColor, "#cc0000"); this.macros = options.macros || {}; this.colorIsTextColor = utils.deflt(options.colorIsTextColor, false); - this.unicodeTextInMathMode = - utils.deflt(options.unicodeTextInMathMode, false); + this.strict = utils.deflt(options.strict, "warn"); this.maxSize = Math.max(0, utils.deflt(options.maxSize, Infinity)); } + + /** + * Report nonstrict (non-LaTeX-compatible) input. + * Can safely not be called if `this.strict` is false in JavaScript. + */ + nonstrict(errorCode: string, errorMsg: string, token?: Token | ParseNode<*>) { + let strict = this.strict; + if (typeof strict === "function") { + // Allow return value of strict function to be boolean or string + // (or null/undefined, meaning no further processing). + strict = strict(errorCode, errorMsg, token); + } + if (!strict || strict === "ignore") { + return; + } else if (strict === true || strict === "error") { + throw new ParseError( + "LaTeX-incompatible input and strict mode is set to 'error': " + + `${errorMsg} [${errorCode}]`, token); + } else if (strict === "warn") { + typeof console !== "undefined" && console.warn( + "LaTeX-incompatible input and strict mode is set to 'warn': " + + `${errorMsg} [${errorCode}]`); + } else { // won't happen in type-safe code + typeof console !== "undefined" && console.warn( + "LaTeX-incompatible input and strict mode is set to " + + `unrecognized '${strict}': ${errorMsg} [${errorCode}]`); + } + } } export default Settings; diff --git a/src/functions/kern.js b/src/functions/kern.js index 4f74e66c59..79a6d25edf 100644 --- a/src/functions/kern.js +++ b/src/functions/kern.js @@ -1,12 +1,10 @@ //@flow -/* eslint no-console:0 */ // Horizontal spacing commands import defineFunction from "../defineFunction"; import buildCommon from "../buildCommon"; import mathMLTree from "../mathMLTree"; import { calculateSize } from "../units"; -import ParseError from "../ParseError"; // TODO: \hskip and \mskip should support plus and minus in lengths @@ -19,22 +17,24 @@ defineFunction({ allowedInText: true, }, handler: (context, args) => { - const mathFunction = (context.funcName[1] === 'm'); // \mkern, \mskip - const muUnit = (args[0].value.unit === 'mu'); - if (mathFunction) { - if (!muUnit) { - typeof console !== "undefined" && console.warn( - `In LaTeX, ${context.funcName} supports only mu units, ` + - `not ${args[0].value.unit} units`); - } - if (context.parser.mode !== "math") { - throw new ParseError( - `Can't use function '${context.funcName}' in text mode`); - } - } else { // !mathFunction - if (muUnit) { - typeof console !== "undefined" && console.warn( - `In LaTeX, ${context.funcName} does not support mu units`); + if (context.parser.settings.strict) { + const mathFunction = (context.funcName[1] === 'm'); // \mkern, \mskip + const muUnit = (args[0].value.unit === 'mu'); + if (mathFunction) { + if (!muUnit) { + context.parser.settings.nonstrict("mathVsTextUnits", + `LaTeX's ${context.funcName} supports only mu units, ` + + `not ${args[0].value.unit} units`); + } + if (context.parser.mode !== "math") { + context.parser.settings.nonstrict("mathVsTextUnits", + `LaTeX's ${context.funcName} works only in math mode`); + } + } else { // !mathFunction + if (muUnit) { + context.parser.settings.nonstrict("mathVsTextUnits", + `LaTeX's ${context.funcName} doesn't support mu units`); + } } } return { diff --git a/static/main.js b/static/main.js index 01e72df551..c64a4c887e 100644 --- a/static/main.js +++ b/static/main.js @@ -28,6 +28,17 @@ function init() { options.displayMode = false; } + // Use `strict=warn` for warning strict mode or `strict=error` + // (or `=1`/`=t`/`=true`/`=y`/`=yes`) + // to turn off displayMode (which is on by default). + if (query.strict) { + if (query.strict.match(/^(1|t|y|e)/)) { + options.strict = "error"; + } if (query.strict && query.strict.match(/^(w)/)) { + options.strict = "warn"; + } + } + // The `before` or `pre` search parameter puts normal text before the math. // The `after` or `post` search parameter puts normal text after the math. // Example use: testing baseline alignment. diff --git a/test/Warning.js b/test/Warning.js new file mode 100644 index 0000000000..ede85f4d27 --- /dev/null +++ b/test/Warning.js @@ -0,0 +1,20 @@ +// @flow + +class Warning { + name: string; + message: string; + stack: string; + + constructor(message: string) { + // $FlowFixMe + this.name = "Warning"; + // $FlowFixMe + this.message = "Warning: " + message; + // $FlowFixMe + this.stack = new Error().stack; + } +} +// $FlowFixMe +Warning.prototype = Object.create(Error.prototype); + +module.exports = Warning; diff --git a/test/katex-spec.js b/test/katex-spec.js index 81f4d11d56..8865d2663c 100644 --- a/test/katex-spec.js +++ b/test/katex-spec.js @@ -35,7 +35,9 @@ const serializer = { expect.addSnapshotSerializer(serializer); -const defaultSettings = new Settings({}); +const defaultSettings = new Settings({ + strict: false, // deal with warnings only when desired +}); const defaultOptions = new Options({ style: Style.TEXT, size: 5, @@ -2928,24 +2930,23 @@ describe("A parser taking String objects", function() { describe("Unicode accents", function() { it("should parse Latin-1 letters in math mode", function() { - // TODO(edemaine): Unsupported Latin-1 letters in math: ÅåÇÐÞçðþ - expect("ÀÁÂÃÄÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäèéêëìíîïñòóôõöùúûüýÿ") + // TODO(edemaine): Unsupported Latin-1 letters in math: ÇÐÞçðþ + expect("ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ") .toParseLike( - "\\grave A\\acute A\\hat A\\tilde A\\ddot A" + + "\\grave A\\acute A\\hat A\\tilde A\\ddot A\\mathring A" + "\\grave E\\acute E\\hat E\\ddot E" + "\\grave I\\acute I\\hat I\\ddot I" + "\\tilde N" + "\\grave O\\acute O\\hat O\\tilde O\\ddot O" + "\\grave U\\acute U\\hat U\\ddot U" + "\\acute Y" + - "\\grave a\\acute a\\hat a\\tilde a\\ddot a" + + "\\grave a\\acute a\\hat a\\tilde a\\ddot a\\mathring a" + "\\grave e\\acute e\\hat e\\ddot e" + "\\grave ı\\acute ı\\hat ı\\ddot ı" + "\\tilde n" + "\\grave o\\acute o\\hat o\\tilde o\\ddot o" + "\\grave u\\acute u\\hat u\\ddot u" + - "\\acute y\\ddot y", - {unicodeTextInMathMode: true}); + "\\acute y\\ddot y"); }); it("should parse Latin-1 letters in text mode", function() { @@ -2970,26 +2971,24 @@ describe("Unicode accents", function() { it("should support \\aa in text mode", function() { expect("\\text{\\aa\\AA}").toParseLike("\\text{\\r a\\r A}"); - expect("\\aa").toNotParse(); - expect("\\Aa").toNotParse(); + expect("\\aa").toNotParse(new Settings({strict: true})); + expect("\\Aa").toNotParse(new Settings({strict: true})); }); it("should parse combining characters", function() { - expect("A\u0301C\u0301").toParseLike("Á\\acute C", - {unicodeTextInMathMode: true}); + expect("A\u0301C\u0301").toParseLike("Á\\acute C"); expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}"); }); it("should parse multi-accented characters", function() { - expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true}); + expect("ấā́ắ\\text{ấā́ắ}").toParse(); // Doesn't parse quite the same as // "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups. }); it("should parse accented i's and j's", function() { - expect("íȷ́").toParseLike("\\acute ı\\acute ȷ", - {unicodeTextInMathMode: true}); - expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true}); + expect("íȷ́").toParseLike("\\acute ı\\acute ȷ"); + expect("ấā́ắ\\text{ấā́ắ}").toParse(); }); }); @@ -3154,25 +3153,37 @@ describe("Symbols", function() { }); }); -describe("unicodeTextInMathMode setting", function() { - it("should allow unicode text when true", () => { - expect("é").toParse({unicodeTextInMathMode: true}); - expect("試").toParse({unicodeTextInMathMode: true}); +describe("strict setting", function() { + it("should allow unicode text when not strict", () => { + expect("é").toParse(new Settings({strict: false})); + expect("試").toParse(new Settings({strict: false})); + expect("é").toParse(new Settings({strict: "ignore"})); + expect("試").toParse(new Settings({strict: "ignore"})); + expect("é").toParse(new Settings({strict: () => false})); + expect("試").toParse(new Settings({strict: () => false})); + expect("é").toParse(new Settings({strict: () => "ignore"})); + expect("試").toParse(new Settings({strict: () => "ignore"})); }); - it("should forbid unicode text when false", () => { - expect("é").toNotParse({unicodeTextInMathMode: false}); - expect("試").toNotParse({unicodeTextInMathMode: false}); + it("should forbid unicode text when strict", () => { + expect("é").toNotParse(new Settings({strict: true})); + expect("試").toNotParse(new Settings({strict: true})); + expect("é").toNotParse(new Settings({strict: "error"})); + expect("試").toNotParse(new Settings({strict: "error"})); + expect("é").toNotParse(new Settings({strict: () => true})); + expect("試").toNotParse(new Settings({strict: () => true})); + expect("é").toNotParse(new Settings({strict: () => "error"})); + expect("試").toNotParse(new Settings({strict: () => "error"})); }); - it("should forbid unicode text when default", () => { - expect("é").toNotParse(); - expect("試").toNotParse(); + it("should warn about unicode text when default", () => { + expect("é").toWarn(new Settings()); + expect("試").toWarn(new Settings()); }); it("should always allow unicode text in text mode", () => { - expect("\\text{é試}").toParse({unicodeTextInMathMode: false}); - expect("\\text{é試}").toParse({unicodeTextInMathMode: true}); + expect("\\text{é試}").toParse(new Settings({strict: false})); + expect("\\text{é試}").toParse(new Settings({strict: true})); expect("\\text{é試}").toParse(); }); }); diff --git a/test/setup.js b/test/setup.js new file mode 100644 index 0000000000..7a49d84a03 --- /dev/null +++ b/test/setup.js @@ -0,0 +1,41 @@ +/* global jest: false */ +/* global expect: false */ + +import katex from "../katex"; +import Settings from "../src/Settings"; +import Warning from "./Warning"; + +global.console.warn = jest.fn((warning) => { + throw new Warning(warning); +}); + +const defaultSettings = new Settings({ + strict: false, // enable dealing with warnings only when needed +}); + +expect.extend({ + toWarn: function(actual, settings) { + const usedSettings = settings ? settings : defaultSettings; + + const result = { + pass: false, + message: () => + `Expected '${actual}' to generate a warning, but it succeeded`, + }; + + try { + katex.__renderToDomTree(actual, usedSettings); + } catch (e) { + if (e instanceof Warning) { + result.pass = true; + result.message = () => + `'${actual}' correctly generated warning: ${e.message}`; + } else { + result.message = () => + `'${actual}' failed building with unknown error: ${e.message}`; + } + } + + return result; + }, +}); diff --git a/test/unicode-spec.js b/test/unicode-spec.js index a21af06fd3..0ece694ec9 100644 --- a/test/unicode-spec.js +++ b/test/unicode-spec.js @@ -8,7 +8,10 @@ import parseTree from "../src/parseTree"; import Settings from "../src/Settings"; import {scriptFromCodepoint, supportedCodepoint} from "../src/unicodeScripts"; -const defaultSettings = new Settings({}); +const defaultSettings = new Settings({ + strict: false, // deal with warnings only when desired +}); +const strictSettings = new Settings({strict: true}); const parseAndSetResult = function(expr, result, settings) { try { @@ -72,16 +75,16 @@ describe("unicode", function() { 'ÆÇÐØÞßæçðøþ}').toParse(); }); - it("should not parse Latin-1 outside \\text{} without setting", function() { + it("should not parse Latin-1 outside \\text{} with strict", function() { const chars = 'ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿÇÐÞçþ'; for (const ch of chars) { - expect(ch).toNotParse(); + expect(ch).toNotParse(strictSettings); } }); it("should parse Latin-1 outside \\text{}", function() { expect('ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ' + - 'ÇÐÞçðþ').toParse({unicodeTextInMathMode: true}); + 'ÇÐÞçðþ').toParse(); }); it("should parse all lower case Greek letters", function() { @@ -96,8 +99,8 @@ describe("unicode", function() { expect('\\text{БГДЖЗЙЛФЦШЫЮЯ}').toParse(); }); - it("should not parse Cyrillic outside \\text{}", function() { - expect('БГДЖЗЙЛФЦШЫЮЯ').toNotParse(); + it("should not parse Cyrillic outside \\text{} with strict", function() { + expect('БГДЖЗЙЛФЦШЫЮЯ').toNotParse(strictSettings); }); it("should parse CJK inside \\text{}", function() { @@ -105,33 +108,33 @@ describe("unicode", function() { expect('\\text{여보세요}').toParse(); }); - it("should not parse CJK outside \\text{}", function() { - expect('私はバナナです。').toNotParse(); - expect('여보세요').toNotParse(); + it("should not parse CJK outside \\text{} with strict", function() { + expect('私はバナナです。').toNotParse(strictSettings); + expect('여보세요').toNotParse(strictSettings); }); it("should parse Devangari inside \\text{}", function() { expect('\\text{नमस्ते}').toParse(); }); - it("should not parse Devangari outside \\text{}", function() { - expect('नमस्ते').toNotParse(); + it("should not parse Devangari outside \\text{} with strict", function() { + expect('नमस्ते').toNotParse(strictSettings); }); it("should parse Georgian inside \\text{}", function() { expect('\\text{გამარჯობა}').toParse(); }); - it("should not parse Georgian outside \\text{}", function() { - expect('გამარჯობა').toNotParse(); + it("should not parse Georgian outside \\text{} with strict", function() { + expect('გამარჯობა').toNotParse(strictSettings); }); it("should parse extended Latin characters inside \\text{}", function() { expect('\\text{ěščřžůřťďňőİı}').toParse(); }); - it("should not parse extended Latin outside \\text{}", function() { - expect('ěščřžůřťďňőİı').toNotParse(); + it("should not parse extended Latin outside \\text{} with strict", function() { + expect('ěščřžůřťďňőİı').toNotParse(strictSettings); }); });