JSONC-org · DecimalTurn · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,6 @@ _site
 .jekyll-metadata
 vendor
 Gemfile.lock
+node_modules
+local
+grammar/jsonc-processed.abnf
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,4 @@
+[submodule "submodules/railroad-diagram-generator-js"]
+	path = submodules/railroad-diagram-generator-js
+	url = https://github.com/DecimalTurn/railroad-diagram-generator-js
+	branch = dev
diff --git a/assets/examples/many_comments.jsonc b/assets/examples/many_comments.jsonc
@@ -0,0 +1,14 @@
+//Comment
+/*Comment*/
+/*Comment*/{//Comment
+
+/*Comment*/"key1" /*Comment*/:/*Comment*/"value1"/*Comment*/,/*Comment*/
+
+"key2" //Comment
+: //Comment
+"value2" //Comment
+, //Comment
+
+"array1": [/*Comment*/"value1"/*Comment*/,/*Comment*/"value2",/*Comment*/1/*Comment*/]
+
+}
diff --git a/generate-railroad.js b/generate-railroad.js
@@ -0,0 +1,176 @@
+#!/usr/bin/env node
+
+const fs = require("node:fs");
+const { spawnSync } = require("node:child_process");
+const path = require("node:path");
+
+// Customization section
+const DEFAULT_INPUT_ABNF = "grammar/jsonc.abnf";
+const DEFAULT_PROCESSED_ABNF = "grammar/jsonc-processed.abnf";
+const DEFAULT_OUTPUT_HTML = "grammar/railroad-diagram.html";
+
+// Rules to inline from their %x... definitions as literal ABNF strings.
+// Add more rule names here to apply the same transformation.
+const INLINE_HEX_RULES = [
+  "multi-line-comment-start",
+  "multi-line-comment-end",
+  "asterisk",
+  "escape"
+];
+
+function escapeRegExp(value) {
+  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+function decodeAbnfHexSequence(value) {
+  const trimmed = value.trim();
+  if (!/^%x[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]+)*$/.test(trimmed)) {
+    throw new Error(`Unsupported ABNF hex sequence: ${value}`);
+  }
+
+  const bytes = trimmed
+    .slice(2)
+    .split(".")
+    .map((part) => parseInt(part, 16));
+
+  return String.fromCodePoint(...bytes);
+}
+
+function inlineHexRuleAsLiteral(source, ruleName) {
+  const escapedRuleName = escapeRegExp(ruleName);
+  const ruleRegex = new RegExp(
+    `^\\s*${escapedRuleName}\\s*=\\s*(%x[0-9A-Fa-f]+(?:\\.[0-9A-Fa-f]+)*)\\b.*$`,
+    "m",
+  );
+  const ruleMatch = source.match(ruleRegex);
+  if (!ruleMatch) {
+    throw new Error(`Rule ${ruleName} was not found.`);
+  }
+
+  const hexSequence = ruleMatch[1];
+  const literalChars = decodeAbnfHexSequence(hexSequence);
+
+  // For backslash or other problematic characters, keep them as hex format
+  // ABNF doesn't support backslash escaping in quoted strings
+  let replacement;
+  if (literalChars === "\\") {
+    replacement = hexSequence;
+  } else {
+    // For other characters, escape only double quotes (not backslashes)
+    const escapedLiteralChars = literalChars.replace(/"/g, '\\"');
+    replacement = `"${escapedLiteralChars}"`;
+  }
+
+  const removeRuleRegex = new RegExp(`^\\s*${escapedRuleName}\\s*=.*(?:\\r?\\n|$)`, "m");
+  const withoutRule = source.replace(removeRuleRegex, "");
+
+  const useRuleRegex = new RegExp(
+    `(?<![A-Za-z0-9-])${escapedRuleName}(?![A-Za-z0-9-])`,
+    "g",
+  );
+
+  // Replace only grammar expressions: RHS after '=' or continuation lines.
+  return withoutRule
+    .split(/\r?\n/)
+    .map((line) => {
+      const eqIndex = line.indexOf("=");
+
+      if (eqIndex !== -1) {
+        const lhs = line.slice(0, eqIndex + 1);
+        const rhs = line.slice(eqIndex + 1).replace(useRuleRegex, replacement);
+        return `${lhs}${rhs}`;
+      }
+
+      if (/^\s/.test(line)) {
+        return line.replace(useRuleRegex, replacement);
+      }
+
+      return line;
+    })
+    .join("\n");
+}
+
+function processAbnfSource(source) {
+  let processed = source;
+
+  for (const ruleName of INLINE_HEX_RULES) {
+    processed = inlineHexRuleAsLiteral(processed, ruleName);
+  }
+
+  return processed;
+}
+
+const args = process.argv.slice(2);
+const titleIndex = args.indexOf("--title");
+
+let title;
+if (titleIndex !== -1) {
+  if (titleIndex + 1 >= args.length) {
+    console.error("Missing value for --title");
+    process.exit(1);
+  }
+  title = args[titleIndex + 1];
+  args.splice(titleIndex, 2);
+}
+
+const input = args[0] || DEFAULT_INPUT_ABNF;
+const output = args[1] || DEFAULT_OUTPUT_HTML;
+const processedAbnf = DEFAULT_PROCESSED_ABNF;
+
+const inputPath = path.resolve(__dirname, input);
+const outputPath = path.resolve(__dirname, output);
+const processedPath = path.resolve(__dirname, processedAbnf);
+
+let source;
+try {
+  source = fs.readFileSync(inputPath, "utf8");
+} catch (error) {
+  console.error(`Failed to read input ABNF: ${error.message}`);
+  process.exit(1);
+}
+
+let processed;
+try {
+  processed = processAbnfSource(source);
+} catch (error) {
+  console.error(`Failed to process ABNF source: ${error.message}`);
+  process.exit(1);
+}
+
+if (typeof processed !== "string") {
+  console.error("Failed to process ABNF source: processAbnfSource must return a string.");
+  process.exit(1);
+}
+
+try {
+  fs.mkdirSync(path.dirname(processedPath), { recursive: true });
+  fs.writeFileSync(processedPath, processed, "utf8");
+} catch (error) {
+  console.error(`Failed to write processed ABNF: ${error.message}`);
+  process.exit(1);
+}
+
+const cliPath = path.join(
+  __dirname,
+  "node_modules",
+  "railroad-diagram-generator-js",
+  "bin",
+  "cli.js",
+);
+
+const cliArgs = [cliPath, "generate", processedPath, outputPath];
+if (title) {
+  cliArgs.push("--title", title);
+}
+
+const result = spawnSync(process.execPath, cliArgs, {
+  cwd: __dirname,
+  stdio: "inherit",
+});
+
+if (result.error) {
+  console.error(`Failed to run railroad generator: ${result.error.message}`);
+  process.exit(1);
+}
+
+process.exit(result.status === null ? 1 : result.status);
diff --git a/grammar/README.md b/grammar/README.md
@@ -0,0 +1,73 @@
+# JSONC Grammar directory
+
+This directory contains the ABNF grammar for JSONC, along with plans for generating railroad diagrams from it.
+
+## Railroad Diagram Generation Plan
+
+Generate railroad diagrams from `grammar/jsonc.abnf` using a simple one-file Node.js script.
+
+Instead of building a custom ABNF parser and converter to Tab Atkins constructor calls, use:
+
+- https://github.com/xtofs/railroad-diagram-generator.js
+
+This tool parses ABNF directly and generates static HTML with embedded SVG diagrams.
+
+### Script behavior
+
+The wrapper script should:
+
+1. Accept input ABNF path and optional output HTML path.
+2. Default to:
+   - input: `grammar/jsonc.abnf`
+   - output: `grammar/railroad-diagram.html`
+3. Optionally accept `--title` to set the HTML title.
+4. Execute the upstream CLI from our installed dependency.
+5. Exit non-zero on error and print useful diagnostics.
+
+### Command examples
+
+Initialize submodule(s):
+
+```bash
+git submodule update --init --recursive
+```
+
+Install dependencies:
+
+```bash
+npm install
+```
+
+The dependency is sourced from disk via:
+
+```json
+"railroad-diagram-generator-js": "file:./submodules/railroad-diagram-generator-js"
+```
+
+Generate with defaults:
+
+```bash
+npm run railroad
+```
+
+Generate from a specific input and output:
+
+```bash
+npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html
+```
+
+Generate with a custom title:
+
+```bash
+npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html --title "JSONC Grammar"
+```
+
+### Notes on EOF for single-line comments
+
+The grammar already allows inline comments to terminate at end-of-file because the line terminator is optional:
+
+```abnf
+single-line-comment = "//" *single-line-comment-char [ comment-terminator ]
+```
+
+So diagrams generated from this ABNF should not imply a mandatory line ending.
diff --git a/grammar/jsonc.abnf b/grammar/jsonc.abnf
@@ -0,0 +1,108 @@
+; JSONC grammar with comments support (RFC 8259 extended with JavaScript-style comments)
+;
+; Notes:
+; - Rule names and structure follow RFC 8259 ABNF snippets.
+; - DIGIT and HEXDIG are core rules from RFC 5234.
+; - comments are an extension not in RFC 8259.
+; - Trailing commas are NOT supported in this grammar.
+
+; A JSONC-text is a serialized value surrounded by optional whitespace and comments.
+; Comments can appear anywhere insignificant whitespace is allowed in JSON.
+JSONC-text = wsc value wsc
+
+; Whitespace with Comments: zero or more whitespace characters or comments
+wsc = *(ws-char / comment)
+
+; Single whitespace character (space, tab, line feed, carriage return)
+ws-char = %x20 / %x09 / %x0A / %x0D  ; space / tab / LF / CR
+
+; Comments: single-line or multi-line
+comment = single-line-comment / multi-line-comment
+
+; Source character: any Unicode code point, as per ECMAScript.
+source-character = %x00-10FFFF
+
+; Comment terminators and sequences (based on ECMAScript line terminators)
+comment-terminator = %x0A / %x0D / %x2028 / %x2029          ; LF / CR / LS / PS
+comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029 
+
+; Single-line comment: starts with //, continues until line ending
+; Terminator is not part of the comment body.
+; Note that the single-line-comment-end is optional, allowing comments to end at the end of the file without a line terminator.
+single-line-comment-start = %x2F.2F             ; // double solidus
+single-line-comment-end = comment-terminator-sequence
+single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
+single-line-comment-char = %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators
+
+; Multi-line comment: /* ... */
+; Cannot be nested. The first */ closes the comment.
+; Any source character is allowed inside, including control characters,
+; except the closing delimiter sequence "*/".
+multi-line-comment-start = %x2F.2A             ; /* slash-asterisk
+multi-line-comment-end = %x2A.2F               ; */ asterisk-slash
+asterisk = %x2A                                ; * asterisk character
+multi-line-comment = multi-line-comment-start [ multi-line-comment-chars ] multi-line-comment-end
+multi-line-comment-chars = not-asterisk-char [ multi-line-comment-chars ] /
+                           asterisk [ post-asterisk-comment-chars ]
+post-asterisk-comment-chars = not-forward-slash-or-asterisk-char [ multi-line-comment-chars ] /
+                              asterisk [ post-asterisk-comment-chars ]
+not-asterisk-char = %x00-29 / %x2B-10FFFF
+not-forward-slash-or-asterisk-char = %x00-29 / %x2B-2E / %x30-10FFFF
+
+; Structural characters with surrounding optional whitespace and comments
+begin-array     = wsc %x5B wsc  ; [ left square bracket
+begin-object    = wsc %x7B wsc  ; { left curly bracket
+end-array       = wsc %x5D wsc  ; ] right square bracket
+end-object      = wsc %x7D wsc  ; } right curly bracket
+name-separator  = wsc %x3A wsc  ; : colon
+value-separator = wsc %x2C wsc  ; , comma
+
+; Any JSON value
+value = false / null / true / object / array / number / string
+
+; Literal names (boolean values and null)
+false = %x66.61.6C.73.65   ; false
+true  = %x74.72.75.65      ; true
+null  = %x6E.75.6C.6C      ; null
+
+; Objects
+object = begin-object [ member *( value-separator member ) ] end-object
+member = string name-separator value
+
+; Arrays
+array = begin-array [ value *( value-separator value ) ] end-array
+
+; Numbers
+number = [ minus ] int [ frac ] [ exp ]
+decimal-point = %x2E        ; .
+digit1-9 = %x31-39          ; 1-9
+e = %x65 / %x45             ; e E
+exp = e [ minus / plus ] 1*DIGIT
+frac = decimal-point 1*DIGIT
+int = zero / ( digit1-9 *DIGIT )
+minus = %x2D                ; -
+plus = %x2B                 ; +
+zero = %x30                 ; 0
+
+; Strings
+string = quotation-mark *char quotation-mark
+
+char = unescaped /
+ escape (
+ %x22 /             ; "    quotation mark  U+0022
+ %x5C /             ; \    reverse solidus U+005C
+ %x2F /             ; /    solidus         U+002F
+ %x62 /             ; b    backspace       U+0008
+ %x66 /             ; f    form feed       U+000C
+ %x6E /             ; n    line feed       U+000A
+ %x72 /             ; r    carriage return U+000D
+ %x74 /             ; t    tab             U+0009
+ %x75 4HEXDIG       ; uXXXX                U+XXXX
+ )
+
+escape = %x5C               ; \
+quotation-mark = %x22       ; "
+
+unescaped = %x20-21 / %x23-5B / %x5D-10FFFF       ; Any code point except quotation mark, reverse solidus or ASCII control chars 
+
+; End of JSONC grammar (RFC 8259 extended with JavaScript-style comments).