Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,6 @@ _site
.jekyll-metadata
vendor
Gemfile.lock
node_modules
local
grammar/jsonc-processed.abnf
4 changes: 4 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[submodule "submodules/railroad-diagram-generator-js"]
path = submodules/railroad-diagram-generator-js
url = https://github.com/DecimalTurn/railroad-diagram-generator-js
branch = dev
14 changes: 14 additions & 0 deletions assets/examples/many_comments.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//Comment
/*Comment*/
/*Comment*/{//Comment

/*Comment*/"key1" /*Comment*/:/*Comment*/"value1"/*Comment*/,/*Comment*/

"key2" //Comment
: //Comment
"value2" //Comment
, //Comment

"array1": [/*Comment*/"value1"/*Comment*/,/*Comment*/"value2",/*Comment*/1/*Comment*/]

}
176 changes: 176 additions & 0 deletions generate-railroad.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
#!/usr/bin/env node

const fs = require("node:fs");
const { spawnSync } = require("node:child_process");
const path = require("node:path");

// Customization section
const DEFAULT_INPUT_ABNF = "grammar/jsonc.abnf";
const DEFAULT_PROCESSED_ABNF = "grammar/jsonc-processed.abnf";
const DEFAULT_OUTPUT_HTML = "grammar/railroad-diagram.html";

// Rules to inline from their %x... definitions as literal ABNF strings.
// Add more rule names here to apply the same transformation.
const INLINE_HEX_RULES = [
"multi-line-comment-start",
"multi-line-comment-end",
"asterisk",
"escape"
];

function escapeRegExp(value) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

function decodeAbnfHexSequence(value) {
const trimmed = value.trim();
if (!/^%x[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]+)*$/.test(trimmed)) {
throw new Error(`Unsupported ABNF hex sequence: ${value}`);
}

const bytes = trimmed
.slice(2)
.split(".")
.map((part) => parseInt(part, 16));

return String.fromCodePoint(...bytes);
}

function inlineHexRuleAsLiteral(source, ruleName) {
const escapedRuleName = escapeRegExp(ruleName);
const ruleRegex = new RegExp(
`^\\s*${escapedRuleName}\\s*=\\s*(%x[0-9A-Fa-f]+(?:\\.[0-9A-Fa-f]+)*)\\b.*$`,
"m",
);
const ruleMatch = source.match(ruleRegex);
if (!ruleMatch) {
throw new Error(`Rule ${ruleName} was not found.`);
}

const hexSequence = ruleMatch[1];
const literalChars = decodeAbnfHexSequence(hexSequence);

// For backslash or other problematic characters, keep them as hex format
// ABNF doesn't support backslash escaping in quoted strings
let replacement;
if (literalChars === "\\") {
replacement = hexSequence;
} else {
// For other characters, escape only double quotes (not backslashes)
const escapedLiteralChars = literalChars.replace(/"/g, '\\"');
replacement = `"${escapedLiteralChars}"`;
}

const removeRuleRegex = new RegExp(`^\\s*${escapedRuleName}\\s*=.*(?:\\r?\\n|$)`, "m");
const withoutRule = source.replace(removeRuleRegex, "");

const useRuleRegex = new RegExp(
`(?<![A-Za-z0-9-])${escapedRuleName}(?![A-Za-z0-9-])`,
"g",
);

// Replace only grammar expressions: RHS after '=' or continuation lines.
return withoutRule
.split(/\r?\n/)
.map((line) => {
const eqIndex = line.indexOf("=");

if (eqIndex !== -1) {
const lhs = line.slice(0, eqIndex + 1);
const rhs = line.slice(eqIndex + 1).replace(useRuleRegex, replacement);
return `${lhs}${rhs}`;
}

if (/^\s/.test(line)) {
return line.replace(useRuleRegex, replacement);
}

return line;
})
.join("\n");
}

function processAbnfSource(source) {
let processed = source;

for (const ruleName of INLINE_HEX_RULES) {
processed = inlineHexRuleAsLiteral(processed, ruleName);
}

return processed;
}

const args = process.argv.slice(2);
const titleIndex = args.indexOf("--title");

let title;
if (titleIndex !== -1) {
if (titleIndex + 1 >= args.length) {
console.error("Missing value for --title");
process.exit(1);
}
title = args[titleIndex + 1];
args.splice(titleIndex, 2);
}

const input = args[0] || DEFAULT_INPUT_ABNF;
const output = args[1] || DEFAULT_OUTPUT_HTML;
const processedAbnf = DEFAULT_PROCESSED_ABNF;

const inputPath = path.resolve(__dirname, input);
const outputPath = path.resolve(__dirname, output);
const processedPath = path.resolve(__dirname, processedAbnf);

let source;
try {
source = fs.readFileSync(inputPath, "utf8");
} catch (error) {
console.error(`Failed to read input ABNF: ${error.message}`);
process.exit(1);
}

let processed;
try {
processed = processAbnfSource(source);
} catch (error) {
console.error(`Failed to process ABNF source: ${error.message}`);
process.exit(1);
}

if (typeof processed !== "string") {
console.error("Failed to process ABNF source: processAbnfSource must return a string.");
process.exit(1);
}

try {
fs.mkdirSync(path.dirname(processedPath), { recursive: true });
fs.writeFileSync(processedPath, processed, "utf8");
} catch (error) {
console.error(`Failed to write processed ABNF: ${error.message}`);
process.exit(1);
}

const cliPath = path.join(
__dirname,
"node_modules",
"railroad-diagram-generator-js",
"bin",
"cli.js",
);

const cliArgs = [cliPath, "generate", processedPath, outputPath];
if (title) {
cliArgs.push("--title", title);
}

const result = spawnSync(process.execPath, cliArgs, {
cwd: __dirname,
stdio: "inherit",
});

if (result.error) {
console.error(`Failed to run railroad generator: ${result.error.message}`);
process.exit(1);
}

process.exit(result.status === null ? 1 : result.status);
73 changes: 73 additions & 0 deletions grammar/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
# JSONC Grammar directory

This directory contains the ABNF grammar for JSONC, along with plans for generating railroad diagrams from it.

## Railroad Diagram Generation Plan

Generate railroad diagrams from `grammar/jsonc.abnf` using a simple one-file Node.js script.

Instead of building a custom ABNF parser and converter to Tab Atkins constructor calls, use:

- https://github.com/xtofs/railroad-diagram-generator.js

This tool parses ABNF directly and generates static HTML with embedded SVG diagrams.

### Script behavior

The wrapper script should:

1. Accept input ABNF path and optional output HTML path.
2. Default to:
- input: `grammar/jsonc.abnf`
- output: `grammar/railroad-diagram.html`
3. Optionally accept `--title` to set the HTML title.
4. Execute the upstream CLI from our installed dependency.
5. Exit non-zero on error and print useful diagnostics.

### Command examples

Initialize submodule(s):

```bash
git submodule update --init --recursive
```

Install dependencies:

```bash
npm install
```

The dependency is sourced from disk via:

```json
"railroad-diagram-generator-js": "file:./submodules/railroad-diagram-generator-js"
```

Generate with defaults:

```bash
npm run railroad
```

Generate from a specific input and output:

```bash
npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html
```

Generate with a custom title:

```bash
npm run railroad -- grammar/jsonc.abnf grammar/railroad-diagram.html --title "JSONC Grammar"
```

### Notes on EOF for single-line comments

The grammar already allows inline comments to terminate at end-of-file because the line terminator is optional:

```abnf
single-line-comment = "//" *single-line-comment-char [ comment-terminator ]
```

So diagrams generated from this ABNF should not imply a mandatory line ending.
108 changes: 108 additions & 0 deletions grammar/jsonc.abnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
; JSONC grammar with comments support (RFC 8259 extended with JavaScript-style comments)
;
; Notes:
; - Rule names and structure follow RFC 8259 ABNF snippets.
; - DIGIT and HEXDIG are core rules from RFC 5234.
; - comments are an extension not in RFC 8259.
; - Trailing commas are NOT supported in this grammar.

; A JSONC-text is a serialized value surrounded by optional whitespace and comments.
; Comments can appear anywhere insignificant whitespace is allowed in JSON.
JSONC-text = wsc value wsc

; Whitespace with Comments: zero or more whitespace characters or comments
wsc = *(ws-char / comment)

; Single whitespace character (space, tab, line feed, carriage return)
ws-char = %x20 / %x09 / %x0A / %x0D ; space / tab / LF / CR

; Comments: single-line or multi-line
comment = single-line-comment / multi-line-comment

; Source character: any Unicode code point, as per ECMAScript.
source-character = %x00-10FFFF

; Comment terminators and sequences (based on ECMAScript line terminators)
comment-terminator = %x0A / %x0D / %x2028 / %x2029 ; LF / CR / LS / PS
comment-terminator-sequence = %x0D.0A / %x0A / %x0D / %x2028 / %x2029

; Single-line comment: starts with //, continues until line ending
; Terminator is not part of the comment body.
; Note that the single-line-comment-end is optional, allowing comments to end at the end of the file without a line terminator.
single-line-comment-start = %x2F.2F ; // double solidus
single-line-comment-end = comment-terminator-sequence
single-line-comment = single-line-comment-start *single-line-comment-char [ single-line-comment-end ]
single-line-comment-char = %x00-09 / %x0B-0C / %x0E-2027 / %x202A-10FFFF ; Any source character except comment terminators

; Multi-line comment: /* ... */
; Cannot be nested. The first */ closes the comment.
; Any source character is allowed inside, including control characters,
; except the closing delimiter sequence "*/".
multi-line-comment-start = %x2F.2A ; /* slash-asterisk
multi-line-comment-end = %x2A.2F ; */ asterisk-slash
asterisk = %x2A ; * asterisk character
multi-line-comment = multi-line-comment-start [ multi-line-comment-chars ] multi-line-comment-end
multi-line-comment-chars = not-asterisk-char [ multi-line-comment-chars ] /
asterisk [ post-asterisk-comment-chars ]
post-asterisk-comment-chars = not-forward-slash-or-asterisk-char [ multi-line-comment-chars ] /
asterisk [ post-asterisk-comment-chars ]
not-asterisk-char = %x00-29 / %x2B-10FFFF
not-forward-slash-or-asterisk-char = %x00-29 / %x2B-2E / %x30-10FFFF

; Structural characters with surrounding optional whitespace and comments
begin-array = wsc %x5B wsc ; [ left square bracket
begin-object = wsc %x7B wsc ; { left curly bracket
end-array = wsc %x5D wsc ; ] right square bracket
end-object = wsc %x7D wsc ; } right curly bracket
name-separator = wsc %x3A wsc ; : colon
value-separator = wsc %x2C wsc ; , comma

; Any JSON value
value = false / null / true / object / array / number / string

; Literal names (boolean values and null)
false = %x66.61.6C.73.65 ; false
true = %x74.72.75.65 ; true
null = %x6E.75.6C.6C ; null

; Objects
object = begin-object [ member *( value-separator member ) ] end-object
member = string name-separator value

; Arrays
array = begin-array [ value *( value-separator value ) ] end-array

; Numbers
number = [ minus ] int [ frac ] [ exp ]
decimal-point = %x2E ; .
digit1-9 = %x31-39 ; 1-9
e = %x65 / %x45 ; e E
exp = e [ minus / plus ] 1*DIGIT
frac = decimal-point 1*DIGIT
int = zero / ( digit1-9 *DIGIT )
minus = %x2D ; -
plus = %x2B ; +
zero = %x30 ; 0

; Strings
string = quotation-mark *char quotation-mark

char = unescaped /
escape (
%x22 / ; " quotation mark U+0022
%x5C / ; \ reverse solidus U+005C
%x2F / ; / solidus U+002F
%x62 / ; b backspace U+0008
%x66 / ; f form feed U+000C
%x6E / ; n line feed U+000A
%x72 / ; r carriage return U+000D
%x74 / ; t tab U+0009
%x75 4HEXDIG ; uXXXX U+XXXX
)

escape = %x5C ; \
quotation-mark = %x22 ; "

unescaped = %x20-21 / %x23-5B / %x5D-10FFFF ; Any code point except quotation mark, reverse solidus or ASCII control chars

; End of JSONC grammar (RFC 8259 extended with JavaScript-style comments).
Loading