KaTeX · edemaine · May 13, 2018 · Apr 30, 2018 · May 4, 2018 · May 4, 2018
diff --git a/README.md b/README.md
@@ -72,8 +72,10 @@ You can provide an object of options as the last argument to `katex.render` and
 - `errorColor`: `string`. A color string given in the format `"#XXX"` or `"#XXXXXX"`. This option determines the color which unsupported commands are rendered in. (default: `#cc0000`)
 - `macros`: `object`. A collection of custom macros. Each macro is a property with a name like `\name` (written `"\\name"` in JavaScript) which maps to a string that describes the expansion of the macro. Single-character keys can also be included in which case the character will be redefined as the given macro (similar to TeX active characters).
 - `colorIsTextColor`: `boolean`. If `true`, `\color` will work like LaTeX's `\textcolor`, and take two arguments (e.g., `\color{blue}{hello}`), which restores the old behavior of KaTeX (pre-0.8.0). If `false` (the default), `\color` will work like LaTeX's `\color`, and take one argument (e.g., `\color{blue}hello`).  In both cases, `\textcolor` works as in LaTeX (e.g., `\textcolor{blue}{hello}`).
-- `unicodeTextInMathMode`: `boolean`. If `true`, supported unicode text characters like `é` and `試` will also work in math mode. (They always work in text mode.) The default is `false`, which matches XeTeX behavior; `true` emulates MathJax behavior.
 - `maxSize`: `number`. If non-zero, all user-specified sizes, e.g. in `\rule{500em}{500em}`, will be capped to `maxSize` ems. Otherwise, users can make elements and spaces arbitrarily large (the default behavior).
+- `strict`: `boolean` or `string` or `function` (default: `false`). If `false` or `"ignore`", allow features that make writing LaTeX convenient but are not actually supported by (Xe)LaTeX (similar to MathJax). If `true` or `"error"` (LaTeX faithfulness mode), throw an error for any such transgressions. If `"warn"`, warn about such behavior via `console.warn`. Provide a custom function `handler(errorCode, errorMsg, token)` to customize behavior depending on the type of transgression (summarized by the string code `errorCode` and detailed in `errorMsg`); this function can also return `"ignore"`, `"error"`, or `"warn"` to use a built-in behavior.  A list of such features and their `errorCode`s:
+  - `"unicodeTextInMathMode"`: Use of Unicode text characters in math mode.
+  - `"mathVsTextUnits"`: Mismatch of math vs. text commands and units/mode.
 
 For example:
 
@@ -126,9 +128,6 @@ will appear larger than 1cm in browser units.
 - MathJax defines `\color` to be like `\textcolor` by default; set KaTeX's
   `colorIsTextColor` option to `true` for this behavior.  KaTeX's default
   behavior matches MathJax with its `color.js` extension enabled.
-- MathJax supports Unicode text characters in math mode, unlike LaTeX.
-  To support this behavior in KaTeX, set the `unicodeTextInMathMode` option
-  to `true`.
 
 ## Libraries
 

diff --git a/src/Parser.js b/src/Parser.js
@@ -939,12 +939,15 @@ export default class Parser {
                 }, "text"), nucleus);
         }
         // At this point, we should have a symbol, possibly with accents.
-        // First expand any accented base symbol according to unicodeSymbols,
-        // unless we're in math mode and unicodeTextInMathMode is false
-        // (XeTeX-compatible mode).
+        // First expand any accented base symbol according to unicodeSymbols.
         if (unicodeSymbols.hasOwnProperty(text[0]) &&
-            !symbols[this.mode][text[0]] &&
-            (this.settings.unicodeTextInMathMode || this.mode === "text")) {
+            !symbols[this.mode][text[0]]) {
+            // This behavior is not strict (XeTeX-compatible) in math mode.
+            if (this.settings.strict && this.mode === "math") {
+                this.settings.nonstrict("unicodeTextInMathMode",
+                    `Accented Unicode text character "${text[0]}" used in ` +
+                    `math mode`, nucleus);
+            }
             text = unicodeSymbols[text[0]] + text.substr(1);
         }
         // Strip off any combining characters
@@ -960,15 +963,20 @@ export default class Parser {
         // Recognize base symbol
         let symbol = null;
         if (symbols[this.mode][text]) {
-            if (this.mode === 'math' && extraLatin.indexOf(text) >= 0 &&
-                !this.settings.unicodeTextInMathMode) {
-                throw new ParseError(`Unicode text character ${text} used in ` +
-                    `math mode without unicodeTextInMathMode setting`, nucleus);
+            if (this.settings.strict && this.mode === 'math' &&
+                extraLatin.indexOf(text) >= 0) {
+                this.settings.nonstrict("unicodeTextInMathMode",
+                    `Latin-1/Unicode text character "${text[0]}" used in ` +
+                    `math mode`, nucleus);
             }
             symbol = new ParseNode(symbols[this.mode][text].group,
                             text, this.mode, nucleus);
-        } else if (supportedCodepoint(text.charCodeAt(0)) &&
-            (this.mode === "text" || this.settings.unicodeTextInMathMode)) {
+        } else if (supportedCodepoint(text.charCodeAt(0))) {
+            if (this.settings.strict && this.mode === 'math') {
+                this.settings.nonstrict("unicodeTextInMathMode",
+                    `Unicode text character "${text[0]}" used in math mode`,
+                    nucleus);
+            }
             symbol = new ParseNode("textord", text, this.mode, nucleus);
         } else {
             return null;  // EOF, ^, _, {, }, etc.

diff --git a/src/Settings.js b/src/Settings.js
@@ -1,20 +1,28 @@
 // @flow
+/* eslint no-console:0 */
 /**
  * This is a module for storing settings passed into KaTeX. It correctly handles
  * default settings.
  */
 
 import utils from "./utils";
+import ParseError from "./ParseError.js";
+import ParseNode from "./ParseNode";
+import {Token} from "./Token";
 
 import type { MacroMap } from "./macros";
 
+export type StrictFunction =
+    (errorCode: string, errorMsg: string, token?: Token | ParseNode) =>
+    ?(boolean | string);
+
 export type SettingsOptions = {
     displayMode?: boolean;
     throwOnError?: boolean;
     errorColor?: string;
     macros?: MacroMap;
     colorIsTextColor?: boolean;
-    unicodeTextInMathMode?: boolean;
+    strict?: boolean | "ignore" | "warn" | "error" | StrictFunction;
     maxSize?: number;
 };
 
@@ -34,7 +42,7 @@ class Settings {
     errorColor: string;
     macros: MacroMap;
     colorIsTextColor: boolean;
-    unicodeTextInMathMode: boolean;
+    strict: boolean | "ignore" | "warn" | "error" | StrictFunction;
     maxSize: number;
 
     constructor(options: SettingsOptions) {
@@ -45,10 +53,37 @@ class Settings {
         this.errorColor = utils.deflt(options.errorColor, "#cc0000");
         this.macros = options.macros || {};
         this.colorIsTextColor = utils.deflt(options.colorIsTextColor, false);
-        this.unicodeTextInMathMode =
-            utils.deflt(options.unicodeTextInMathMode, false);
+        this.strict = utils.deflt(options.strict, false);
         this.maxSize = Math.max(0, utils.deflt(options.maxSize, Infinity));
     }
+
+    /**
+     * Report nonstrict (non-LaTeX-compatible) input.
+     * Can safely not be called if `this.strict` is false in JavaScript.
+     */
+    nonstrict(errorCode: string, errorMsg: string, token?: Token | ParseNode) {
+        let strict = this.strict;
+        if (typeof strict === "function") {
+            // Allow return value of strict function to be boolean or string
+            // (or null/undefined, meaning no further processing).
+            strict = strict(errorCode, errorMsg, token);
+        }
+        if (!strict || strict === "ignore") {
+            return;
+        } else if (strict === true || strict === "error") {
+            throw new ParseError(
+                "LaTeX-incompatible input and strict mode is set to 'error': " +
+                `${errorMsg} [${errorCode}]`, token);
+        } else if (strict === "warn") {
+            typeof console !== "undefined" && console.warn(
+                "LaTeX-incompatible input and strict mode is set to 'warn': " +
+                `${errorMsg} [${errorCode}]`);
+        } else {  // won't happen in type-safe code
+            typeof console !== "undefined" && console.warn(
+                "LaTeX-incompatible input and strict mode is set to " +
+                `unrecognized '${strict}': ${errorMsg} [${errorCode}]`);
+        }
+    }
 }
 
 export default Settings;
diff --git a/src/functions/kern.js b/src/functions/kern.js
@@ -1,12 +1,10 @@
 //@flow
-/* eslint no-console:0 */
 // Horizontal spacing commands
 
 import defineFunction from "../defineFunction";
 import buildCommon from "../buildCommon";
 import mathMLTree from "../mathMLTree";
 import { calculateSize } from "../units";
-import ParseError from "../ParseError";
 
 // TODO: \hskip and \mskip should support plus and minus in lengths
 
@@ -19,22 +17,24 @@ defineFunction({
         allowedInText: true,
     },
     handler: (context, args) => {
-        const mathFunction = (context.funcName[1] === 'm');  // \mkern, \mskip
-        const muUnit = (args[0].value.unit === 'mu');
-        if (mathFunction) {
-            if (!muUnit) {
-                typeof console !== "undefined" && console.warn(
-                    `In LaTeX, ${context.funcName} supports only mu units, ` +
-                    `not ${args[0].value.unit} units`);
-            }
-            if (context.parser.mode !== "math") {
-                throw new ParseError(
-                    `Can't use function '${context.funcName}' in text mode`);
-            }
-        } else {  // !mathFunction
-            if (muUnit) {
-                typeof console !== "undefined" && console.warn(
-                    `In LaTeX, ${context.funcName} does not support mu units`);
+        if (context.parser.settings.strict) {
+            const mathFunction = (context.funcName[1] === 'm');  // \mkern, \mskip
+            const muUnit = (args[0].value.unit === 'mu');
+            if (mathFunction) {
+                if (!muUnit) {
+                    context.parser.settings.nonstrict("mathVsTextUnits",
+                        `LaTeX's ${context.funcName} supports only mu units, ` +
+                        `not ${args[0].value.unit} units`);
+                }
+                if (context.parser.mode !== "math") {
+                    context.parser.settings.nonstrict("mathVsTextUnits",
+                        `LaTeX's ${context.funcName} works only in math mode`);
+                }
+            } else {  // !mathFunction
+                if (muUnit) {
+                    context.parser.settings.nonstrict("mathVsTextUnits",
+                        `LaTeX's ${context.funcName} doesn't support mu units`);
+                }
             }
         }
         return {

diff --git a/static/main.js b/static/main.js
@@ -28,6 +28,17 @@ function init() {
         options.displayMode = false;
     }
 
+    // Use `strict=warn` for warning strict mode or `strict=error`
+    // (or `=1`/`=t`/`=true`/`=y`/`=yes`)
+    // to turn off displayMode (which is on by default).
+    if (query.strict) {
+        if (query.strict.match(/^(1|t|y|e)/)) {
+            options.strict = "error";
+        } if (query.strict && query.strict.match(/^(w)/)) {
+            options.strict = "warn";
+        }
+    }
+
     // The `before` or `pre` search parameter puts normal text before the math.
     // The `after` or `post` search parameter puts normal text after the math.
     // Example use: testing baseline alignment.

diff --git a/test/katex-spec.js b/test/katex-spec.js
@@ -2903,24 +2903,23 @@ describe("A parser taking String objects", function() {
 
 describe("Unicode accents", function() {
     it("should parse Latin-1 letters in math mode", function() {
-        // TODO(edemaine): Unsupported Latin-1 letters in math: ÅåÇÐÞçðþ
-        expect("ÀÁÂÃÄÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäèéêëìíîïñòóôõöùúûüýÿ")
+        // TODO(edemaine): Unsupported Latin-1 letters in math: ÇÐÞçðþ
+        expect("ÀÁÂÃÄÅÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåèéêëìíîïñòóôõöùúûüýÿ")
         .toParseLike(
-            "\\grave A\\acute A\\hat A\\tilde A\\ddot A" +
+            "\\grave A\\acute A\\hat A\\tilde A\\ddot A\\mathring A" +
             "\\grave E\\acute E\\hat E\\ddot E" +
             "\\grave I\\acute I\\hat I\\ddot I" +
             "\\tilde N" +
             "\\grave O\\acute O\\hat O\\tilde O\\ddot O" +
             "\\grave U\\acute U\\hat U\\ddot U" +
             "\\acute Y" +
-            "\\grave a\\acute a\\hat a\\tilde a\\ddot a" +
+            "\\grave a\\acute a\\hat a\\tilde a\\ddot a\\mathring a" +
             "\\grave e\\acute e\\hat e\\ddot e" +
             "\\grave ı\\acute ı\\hat ı\\ddot ı" +
             "\\tilde n" +
             "\\grave o\\acute o\\hat o\\tilde o\\ddot o" +
             "\\grave u\\acute u\\hat u\\ddot u" +
-            "\\acute y\\ddot y",
-            {unicodeTextInMathMode: true});
+            "\\acute y\\ddot y");
     });
 
     it("should parse Latin-1 letters in text mode", function() {
@@ -2945,26 +2944,24 @@ describe("Unicode accents", function() {
 
     it("should support \\aa in text mode", function() {
         expect("\\text{\\aa\\AA}").toParseLike("\\text{\\r a\\r A}");
-        expect("\\aa").toNotParse();
-        expect("\\Aa").toNotParse();
+        expect("\\aa").toNotParse(new Settings({strict: true}));
+        expect("\\Aa").toNotParse(new Settings({strict: true}));
     });
 
     it("should parse combining characters", function() {
-        expect("A\u0301C\u0301").toParseLike("Á\\acute C",
-            {unicodeTextInMathMode: true});
+        expect("A\u0301C\u0301").toParseLike("Á\\acute C");
         expect("\\text{A\u0301C\u0301}").toParseLike("\\text{Á\\'C}");
     });
 
     it("should parse multi-accented characters", function() {
-        expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
+        expect("ấā́ắ\\text{ấā́ắ}").toParse();
         // Doesn't parse quite the same as
         // "\\text{\\'{\\^a}\\'{\\=a}\\'{\\u a}}" because of the ordgroups.
     });
 
     it("should parse accented i's and j's", function() {
-        expect("íȷ́").toParseLike("\\acute ı\\acute ȷ",
-            {unicodeTextInMathMode: true});
-        expect("ấā́ắ\\text{ấā́ắ}").toParse({unicodeTextInMathMode: true});
+        expect("íȷ́").toParseLike("\\acute ı\\acute ȷ");
+        expect("ấā́ắ\\text{ấā́ắ}").toParse();
     });
 });
 
@@ -3117,25 +3114,37 @@ describe("Symbols", function() {
     });
 });
 
-describe("unicodeTextInMathMode setting", function() {
-    it("should allow unicode text when true", () => {
-        expect("é").toParse({unicodeTextInMathMode: true});
-        expect("試").toParse({unicodeTextInMathMode: true});
+describe("strict setting", function() {
+    it("should allow unicode text when not strict", () => {
+        expect("é").toParse(new Settings({strict: false}));
+        expect("試").toParse(new Settings({strict: false}));
+        expect("é").toParse(new Settings({strict: "ignore"}));
+        expect("試").toParse(new Settings({strict: "ignore"}));
+        expect("é").toParse(new Settings({strict: () => false}));
+        expect("試").toParse(new Settings({strict: () => false}));
+        expect("é").toParse(new Settings({strict: () => "ignore"}));
+        expect("試").toParse(new Settings({strict: () => "ignore"}));
     });
 
-    it("should forbid unicode text when false", () => {
-        expect("é").toNotParse({unicodeTextInMathMode: false});
-        expect("試").toNotParse({unicodeTextInMathMode: false});
+    it("should forbid unicode text when strict", () => {
+        expect("é").toNotParse(new Settings({strict: true}));
+        expect("試").toNotParse(new Settings({strict: true}));
+        expect("é").toNotParse(new Settings({strict: "error"}));
+        expect("試").toNotParse(new Settings({strict: "error"}));
+        expect("é").toNotParse(new Settings({strict: () => true}));
+        expect("試").toNotParse(new Settings({strict: () => true}));
+        expect("é").toNotParse(new Settings({strict: () => "error"}));
+        expect("試").toNotParse(new Settings({strict: () => "error"}));
     });
 
-    it("should forbid unicode text when default", () => {
-        expect("é").toNotParse();
-        expect("試").toNotParse();
+    it("should allow unicode text when default", () => {
+        expect("é").toParse();
+        expect("試").toParse();
     });
 
     it("should always allow unicode text in text mode", () => {
-        expect("\\text{é試}").toParse({unicodeTextInMathMode: false});
-        expect("\\text{é試}").toParse({unicodeTextInMathMode: true});
+        expect("\\text{é試}").toParse(new Settings({strict: false}));
+        expect("\\text{é試}").toParse(new Settings({strict: true}));
         expect("\\text{é試}").toParse();
     });
 });