Skip to content

Commit

Permalink
render EBNF grammar in node kind enums doc comments (#545)
Browse files Browse the repository at this point in the history
  • Loading branch information
OmarTawfik committed Jul 24, 2023
1 parent 575037a commit e73658a
Show file tree
Hide file tree
Showing 34 changed files with 8,894 additions and 659 deletions.
5 changes: 5 additions & 0 deletions .changeset/sweet-bugs-report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"changelog": patch
---

render EBNF grammar on top of each `ProductionKind`, `RuleKind`, and `TokenKind`.
1 change: 1 addition & 0 deletions .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"codegen",
"devcontainer",
"doxygen",
"ebnf",
"inheritdoc",
"ipfs",
"mkdocs",
Expand Down
83 changes: 64 additions & 19 deletions crates/codegen/ebnf/src/nodes.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#[derive(Clone, Eq, Hash, PartialEq)]
pub enum EbnfNode {
BaseProduction,
Choice {
nodes: Vec<EbnfNode>,
},
Expand All @@ -27,13 +25,12 @@ pub enum EbnfNode {
Sequence {
nodes: Vec<EbnfNode>,
},
SubStatement {
name: String,
comment: Option<String>,
root_node: Box<EbnfNode>,
},
Terminal {
value: String,
terminal: String,
},
WithComment {
node: Box<EbnfNode>,
comment: String,
},
ZeroOrMore {
node: Box<EbnfNode>,
Expand All @@ -42,7 +39,16 @@ pub enum EbnfNode {

impl EbnfNode {
pub fn choice(nodes: Vec<EbnfNode>) -> Self {
Self::Choice { nodes }
let mut results = vec![];

for node in nodes {
match node {
EbnfNode::Choice { nodes } => results.extend(nodes),
_ => results.push(node),
}
}

Self::Choice { nodes: results }
}

pub fn difference(minuend: EbnfNode, subtrahend: EbnfNode) -> Self {
Expand Down Expand Up @@ -70,28 +76,40 @@ impl EbnfNode {
}
}

pub fn production_ref(name: String) -> Self {
Self::ProductionRef { name }
pub fn production_ref(name: &str) -> Self {
Self::ProductionRef {
name: name.to_owned(),
}
}

pub fn range(from: char, to: char) -> Self {
Self::Range { from, to }
}

pub fn sequence(nodes: Vec<EbnfNode>) -> Self {
Self::Sequence { nodes }
let mut results = vec![];

for node in nodes {
match node {
EbnfNode::Sequence { nodes } => results.extend(nodes),
_ => results.push(node),
}
}

Self::Sequence { nodes: results }
}

pub fn sub_statement(name: String, comment: Option<String>, root_node: EbnfNode) -> Self {
Self::SubStatement {
name,
comment,
root_node: Box::new(root_node),
pub fn terminal(terminal: &str) -> Self {
Self::Terminal {
terminal: terminal.to_owned(),
}
}

pub fn terminal(value: String) -> Self {
Self::Terminal { value }
pub fn with_comment(node: EbnfNode, comment: String) -> Self {
Self::WithComment {
node: Box::new(node),
comment,
}
}

pub fn zero_or_more(node: EbnfNode) -> Self {
Expand All @@ -100,3 +118,30 @@ impl EbnfNode {
}
}
}

impl EbnfNode {
pub fn precedence(&self) -> u8 {
// We are specifying precedence "groups" instead of a flat list.
// This separates members of the same precedence, like both "a b (c | d)" and "a | b | (c d)".
return match self {
// Not an operator
EbnfNode::WithComment { .. } => 0,

// Binary
EbnfNode::Choice { .. } | EbnfNode::Difference { .. } | EbnfNode::Sequence { .. } => 1,

// Prefix
EbnfNode::Not { .. } => 2,

// Postfix
EbnfNode::OneOrMore { .. }
| EbnfNode::Optional { .. }
| EbnfNode::ZeroOrMore { .. } => 3,

// Primary
EbnfNode::ProductionRef { .. } | EbnfNode::Range { .. } | EbnfNode::Terminal { .. } => {
4
}
};
}
}
62 changes: 23 additions & 39 deletions crates/codegen/ebnf/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,78 +1,62 @@
use codegen_schema::types::{ParserDefinition, ParserRef};

use crate::{nodes::EbnfNode, serialization::GenerateEbnf};
use crate::nodes::EbnfNode;

impl GenerateEbnf for ParserRef {
fn generate_ebnf(&self) -> EbnfNode {
return self.definition.generate_ebnf();
}
}

impl GenerateEbnf for ParserDefinition {
fn generate_ebnf(&self) -> EbnfNode {
match &self {
impl EbnfNode {
pub fn from_parser(parser: &ParserRef) -> Self {
match &parser.definition {
ParserDefinition::Choice(parsers) => {
return EbnfNode::choice(
parsers
.iter()
.map(|parser| parser.generate_ebnf())
.collect(),
);
return Self::choice(parsers.iter().map(Self::from_parser).collect());
}

ParserDefinition::DelimitedBy {
open,
parser,
close,
} => {
return EbnfNode::sequence(vec![
EbnfNode::production_ref(open.reference.to_owned()),
parser.generate_ebnf(),
EbnfNode::production_ref(close.reference.to_owned()),
return Self::sequence(vec![
Self::production_ref(&open.reference),
Self::from_parser(&parser),
Self::production_ref(&close.reference),
]);
}

ParserDefinition::OneOrMore(parser) => {
return EbnfNode::one_or_more(parser.generate_ebnf());
return Self::one_or_more(Self::from_parser(&parser));
}

ParserDefinition::Optional(parser) => {
return EbnfNode::optional(parser.generate_ebnf());
return Self::optional(Self::from_parser(&parser));
}

ParserDefinition::Reference(name) => {
return EbnfNode::production_ref(name.to_owned());
return Self::production_ref(&name);
}

ParserDefinition::SeparatedBy { parser, separator } => {
return EbnfNode::sequence(vec![
parser.generate_ebnf(),
EbnfNode::zero_or_more(EbnfNode::sequence(vec![
EbnfNode::production_ref(separator.reference.to_owned()),
parser.generate_ebnf(),
return Self::sequence(vec![
Self::from_parser(&parser),
Self::zero_or_more(Self::sequence(vec![
Self::production_ref(&separator.reference),
Self::from_parser(&parser),
])),
]);
}

ParserDefinition::Sequence(parsers) => {
return EbnfNode::sequence(
parsers
.iter()
.map(|parser| parser.generate_ebnf())
.collect(),
);
return Self::sequence(parsers.iter().map(Self::from_parser).collect());
}

ParserDefinition::TerminatedBy { parser, terminator } => {
return EbnfNode::sequence(vec![
parser.generate_ebnf(),
EbnfNode::production_ref(terminator.reference.to_owned()),
return Self::sequence(vec![
Self::from_parser(&parser),
Self::production_ref(&terminator.reference),
]);
}

ParserDefinition::ZeroOrMore(parser) => {
return EbnfNode::zero_or_more(parser.generate_ebnf());
return Self::zero_or_more(Self::from_parser(&parser));
}
}
};
}
}
104 changes: 64 additions & 40 deletions crates/codegen/ebnf/src/precedence_parser.rs
Original file line number Diff line number Diff line change
@@ -1,50 +1,74 @@
use codegen_schema::types::{OperatorModel, PrecedenceParserRef};

use crate::{nodes::EbnfNode, serialization::GenerateEbnf};

impl GenerateEbnf for PrecedenceParserRef {
fn generate_ebnf(&self) -> EbnfNode {
let mut nodes = vec![];

for expression in &self.operator_expressions {
let mut comment = None;

let operator = match expression.model {
OperatorModel::BinaryLeftAssociative => EbnfNode::sequence(vec![
EbnfNode::BaseProduction,
expression.operator.generate_ebnf(),
EbnfNode::BaseProduction,
]),

OperatorModel::BinaryRightAssociative => {
comment = Some("Right Associative".to_owned());

EbnfNode::sequence(vec![
EbnfNode::BaseProduction,
expression.operator.generate_ebnf(),
EbnfNode::BaseProduction,
])
}
OperatorModel::UnaryPrefix => EbnfNode::sequence(vec![
expression.operator.generate_ebnf(),
EbnfNode::BaseProduction,
]),

OperatorModel::UnaryPostfix => EbnfNode::sequence(vec![
EbnfNode::BaseProduction,
expression.operator.generate_ebnf(),
]),
use crate::{nodes::EbnfNode, EbnfSerializer};

impl EbnfNode {
pub fn from_precedence_parser(
precedence_parser: &PrecedenceParserRef,
base_expression: &str,
serializer: &mut EbnfSerializer,
) -> Self {
let mut choices = vec![];

for expression in &precedence_parser.operator_expressions {
let (expression_body, model_description) = match expression.model {
OperatorModel::BinaryLeftAssociative => (
Self::sequence(vec![
Self::production_ref(base_expression),
Self::from_parser(&expression.operator),
Self::production_ref(base_expression),
]),
"Binary Operator, Left Associative",
),

OperatorModel::BinaryRightAssociative => (
Self::sequence(vec![
Self::production_ref(base_expression),
Self::from_parser(&expression.operator),
Self::production_ref(base_expression),
]),
"Binary Operator, Right Associative",
),

OperatorModel::UnaryPrefix => (
Self::sequence(vec![
Self::from_parser(&expression.operator),
Self::production_ref(base_expression),
]),
"Unary Operator, Prefix",
),

OperatorModel::UnaryPostfix => (
Self::sequence(vec![
Self::production_ref(base_expression),
Self::from_parser(&expression.operator),
]),
"Unary Operator, Postfix",
),
};

let serialized_expression_body = {
let mut buffer = String::new();
serializer.serialize_node(&expression_body, &mut buffer);
buffer
};

nodes.push(EbnfNode::sub_statement(
expression.name.to_owned(),
comment,
operator,
choices.push(Self::with_comment(
Self::with_comment(
Self::production_ref(&expression.name),
serialized_expression_body,
),
model_description.to_owned(),
));

serializer.serialize_statement(
&expression.name,
&Self::with_comment(expression_body, model_description.to_owned()),
);
}

nodes.push(self.primary_expression.generate_ebnf());
choices.push(Self::from_parser(&precedence_parser.primary_expression));

return EbnfNode::choice(nodes);
return Self::choice(choices);
}
}

0 comments on commit e73658a

Please sign in to comment.