Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tidy up "newparser" so it can be used properly #423

Merged
merged 7 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 0 additions & 46 deletions packages/phoenix/packages/newparser/parsers/terminals.js

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,31 +1,6 @@
import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from './lib.js';
import { Discard, FirstMatch, None, Optional, Repeat, Sequence } from './parsers/combinators.js';
import { Literal, StringOf } from './parsers/terminals.js';

class Symbol extends Parser {
_create(symbolName) {
this.symbolName = symbolName;
}

_parse (stream) {
const parser = this.symbol_registry[this.symbolName];
if ( ! parser ) {
throw new Error(`No symbol defined named '${this.symbolName}'`);
}
const subStream = stream.fork();
const result = parser.parse(subStream);
console.log(`Result of parsing symbol('${this.symbolName}'):`, result);
if ( result.status === UNRECOGNIZED ) {
return UNRECOGNIZED;
}
if ( result.status === INVALID ) {
return { status: INVALID, value: result };
}
stream.join(subStream);
result.$ = this.symbolName;
return result;
}
}
import { adapt_parser, VALUE } from './parser.js';
import { Discard, FirstMatch, Optional, Repeat, Sequence } from './parsers/combinators.js';
import { Literal, None, StringOf, Symbol } from './parsers/terminals.js';

class ParserWithAction {
#parser;
Expand Down Expand Up @@ -55,6 +30,12 @@ export class GrammarContext {
return new GrammarContext({...this.parsers, ...more_parsers});
}

/**
* Construct a parsing function for the given grammar.
* @param grammar An object of symbol-names to a DSL for parsing that symbol.
* @param actions An object of symbol-names to a function run to process the symbol after it has been parsed.
* @returns {function(*, *, {must_consume_all_input?: boolean}=): *} A function to run the parser. Throws if parsing fails.
*/
define_parser (grammar, actions) {
const symbol_registry = {};
const api = {};
Expand All @@ -76,12 +57,23 @@ export class GrammarContext {
}
}

return (stream, entry_symbol) => {
return (stream, entry_symbol, { must_consume_all_input = true } = {}) => {
const entry_parser = symbol_registry[entry_symbol];
if (!entry_parser) {
throw new Error(`Entry symbol '${entry_symbol}' not found in grammar.`);
}
return entry_parser.parse(stream);
const result = entry_parser.parse(stream);

if (result.status !== VALUE) {
throw new Error('Failed to parse input against grammar.');
}

// Ensure the entire stream is consumed.
if (must_consume_all_input && !stream.is_eof()) {
throw new Error('Parsing did not consume all input.');
}

return result;
};
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ export const UNRECOGNIZED = Symbol('unrecognized');
export const INVALID = Symbol('invalid');
export const VALUE = Symbol('value');

/**
* Base class for parsers.
* To implement your own, subclass it and define these methods:
* - _create(): Acts as the constructor
* - _parse(stream): Performs the parsing on the stream, and returns either UNRECOGNIZED, INVALID, or a result object.
*/
export class Parser {
result (o) {
if (o.value && o.value.$discard) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import { INVALID, UNRECOGNIZED, VALUE, adapt_parser, Parser } from '../lib.js';
import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';

/**
* Runs its child parser, and discards its result.
* @param parser Child parser
*/
export class Discard extends Parser {
_create (parser) {
this.parser = adapt_parser(parser);
Expand All @@ -19,6 +23,10 @@ export class Discard extends Parser {
}
}

/**
* Runs its child parsers in order, and returns the first successful result.
* @param parsers Child parsers
*/
export class FirstMatch extends Parser {
_create (...parsers) {
this.parsers = parsers.map(adapt_parser);
Expand All @@ -42,14 +50,10 @@ export class FirstMatch extends Parser {
}
}

export class None extends Parser {
_create () {}

_parse (stream) {
return { status: VALUE, $: 'none', $discard: true };
}
}

/**
* Runs its child parser, and then returns its result, or nothing.
* @param parser Child parser
*/
export class Optional extends Parser {
_create (parser) {
this.parser = adapt_parser(parser);
Expand All @@ -66,6 +70,12 @@ export class Optional extends Parser {
}
}

/**
* Parses a repeated sequence of values with separators between them.
* @param value_parser Parser for the value
* @param separator_parser Parser for the separator, optional
* @param trailing Whether to allow a trailing separator
*/
export class Repeat extends Parser {
_create (value_parser, separator_parser, { trailing = false } = {}) {
this.value_parser = adapt_parser(value_parser);
Expand All @@ -75,45 +85,58 @@ export class Repeat extends Parser {

_parse (stream) {
const results = [];
for ( ;; ) {
const subStream = stream.fork();
const subStream = stream.fork();

// Value
const result = this.value_parser.parse(subStream);
if ( result.status === UNRECOGNIZED ) {
break;
}
if ( result.status === INVALID ) {
return { status: INVALID, value: result };
}
stream.join(subStream);
if ( ! result.$discard ) results.push(result);
// Parse first value
const result = this.value_parser.parse(subStream);
if ( result.status === INVALID )
return { status: INVALID, value: result };

// Separator
if ( ! this.separator_parser ) {
continue;
}
const separatorResult = this.separator_parser.parse(subStream);
if ( separatorResult.status === UNRECOGNIZED ) {
break;
}
if ( separatorResult.status === INVALID ) {
return { status: INVALID, value: separatorResult };
}
if ( result.status === VALUE ) {
stream.join(subStream);
if ( ! result.$discard ) results.push(separatorResult);

// TODO: Detect trailing separator and reject it if trailing==false
if (!result.$discard) results.push(result);

// Repeatedly parse <separator> <value>
for (;;) {
// Separator
if (!this.separator_parser)
continue;

const separatorResult = this.separator_parser.parse(subStream);
if (separatorResult.status === UNRECOGNIZED)
break;
if (separatorResult.status === INVALID)
return { status: INVALID, value: separatorResult };
stream.join(subStream);
if (!separatorResult.$discard) results.push(separatorResult);

// Value
const result = this.value_parser.parse(subStream);
if (result.status === UNRECOGNIZED) {
// If we failed to parse a value, we have a trailing separator
if (this.trailing === false)
return { status: INVALID, value: result };
break;
}
if (result.status === INVALID)
return { status: INVALID, value: result };

stream.join(subStream);
if (!result.$discard) results.push(result);
}
}

if ( results.length === 0 ) {
if ( results.length === 0 )
return UNRECOGNIZED;
}

return { status: VALUE, value: results };
}
}

/**
* Runs a sequence of child parsers, and returns their result as an array if they all succeed.
* @param parsers Child parsers
*/
export class Sequence extends Parser {
_create (...parsers) {
this.parsers = parsers.map(adapt_parser);
Expand Down
93 changes: 93 additions & 0 deletions packages/phoenix/packages/parsely/parsers/terminals.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';

/**
* Parses a literal value.
* @param value The value to parse
*/
export class Literal extends Parser {
_create (value) {
this.value = value;
}

_parse (stream) {
const subStream = stream.fork();
for ( let i=0 ; i < this.value.length ; i++ ) {
let { done, value } = subStream.next();
if ( done ) return UNRECOGNIZED;
if ( this.value[i] !== value ) return UNRECOGNIZED;
}

stream.join(subStream);
return { status: VALUE, $: 'literal', value: this.value };
}
}

/**
* Parses matching characters as a string.
* @param test Function that takes a character, and returns whether to include it.
*/
export class StringOf extends Parser {
_create (test) {
this.test = test;
}

_parse (stream) {
const subStream = stream.fork();
let text = '';

while (true) {
let { done, value } = subStream.look();
if ( done ) break;
if ( ! this.test(value) ) break;

subStream.next();
text += value;
}

if (text.length === 0) {
return UNRECOGNIZED;
}

stream.join(subStream);
return { status: VALUE, $: 'stringOf', value: text };
}
}

/**
* Parses an object defined by the symbol registry.
* @param symbolName The name of the symbol to parse.
*/
export class Symbol extends Parser {
_create(symbolName) {
this.symbolName = symbolName;
}

_parse (stream) {
const parser = this.symbol_registry[this.symbolName];
if ( ! parser ) {
throw new Error(`No symbol defined named '${this.symbolName}'`);
}
const subStream = stream.fork();
const result = parser.parse(subStream);
if ( result.status === UNRECOGNIZED ) {
return UNRECOGNIZED;
}
if ( result.status === INVALID ) {
return { status: INVALID, value: result };
}
stream.join(subStream);
result.$ = this.symbolName;
return result;
}
}

/**
* Does no parsing and returns a discarded result.
*/
export class None extends Parser {
_create () {}

_parse (stream) {
return { status: VALUE, $: 'none', $discard: true };
}
}
Loading