Skip to content

Commit

Permalink
Never run parser if there are lexer errors (#179)
Browse files Browse the repository at this point in the history
The OQ3 parser does
 lexer -> parser (to ast) -> semantic analysis

We want to impose a couple of invariants that were not in place in the r-a code:
1. If there are any lexing errors, parsing is not performed
2. If there are any parsing (to ast) errors, then semantic analysis is not performed.

This commit implements item number 1.

We follow r-a in pushing lexing and parsing errors onto a single vector. However, because
of rule 1, the errors in this vector will be either all lexer errors or all parser
errors. The semantic analysis code receives a structure representing a parsed file
(actually a structure including parsed included files) and errors. If there are any
errors, semantic analysis is aborted. At the level of the semantic analysis, there is no
need to check whether the errors returned are from lexing or parsing (and therefore
whether there is any parse tree at all).

The immediate impetus for this PR is that pragma statements in which "#pragma" is
misspelled are processed as tokens `InvalidIdent` and a lexing error is recorded.
Previously, lexing and parsing errors were lumped together and parsing always
proceeded. This would result in spurious parser errors added to the lexer error. Rather
than changing the parser (to ast) to recover from lexer errors, we don't allow the parser
to see lexer errors. If the parser detects a lexer error, it should throw an error.
  • Loading branch information
jlapeyre committed Mar 19, 2024
1 parent b5702e0 commit 9705949
Show file tree
Hide file tree
Showing 8 changed files with 145 additions and 29 deletions.
1 change: 1 addition & 0 deletions crates/oq3_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ impl Cursor<'_> {
}
}
}
// Only `#pragma` and `#dim` may begin with a pound character
InvalidIdent
}

Expand Down
6 changes: 5 additions & 1 deletion crates/oq3_parser/src/lexed_str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ impl<'a> LexedStr<'a> {
.map(|it| (it.token as usize, it.msg.as_str()))
}

pub fn errors_len(&self) -> usize {
self.error.len()
}

fn push(&mut self, kind: SyntaxKind, offset: usize) {
self.kind.push(kind);
self.start.push(offset as u32);
Expand Down Expand Up @@ -233,7 +237,7 @@ fn inner_extend_token<'a>(
}

oq3_lexer::TokenKind::InvalidIdent => {
err = "Ident contains invalid characters";
err = "Identifier contains invalid characters";
IDENT
}

Expand Down
18 changes: 11 additions & 7 deletions crates/oq3_semantics/examples/semdemo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,18 +120,22 @@ fn main() {

Some(Commands::Parse { file_name }) => {
let parsed_source = oq3_source_file::parse_source_file(file_name, None::<&[PathBuf]>);
let parse_tree = parsed_source.syntax_ast().tree();
println!(
"Found {} stmts",
parse_tree.statements().collect::<Vec<_>>().len()
);
let syntax_errors = parsed_source.syntax_ast().errors();
let ast = parsed_source.syntax_ast();
let num_stmts = if ast.have_parse() {
ast.tree().statements().count()
} else {
0
};
println!("Found {num_stmts} stmts");
let syntax_errors = ast.errors();
println!(
"Found {} parse errors:\n{:?}\n",
syntax_errors.len(),
syntax_errors
);
print_tree(parse_tree);
if ast.have_parse() {
print_tree(ast.tree());
}
}

Some(Commands::ParseGreen { file_name }) => {
Expand Down
4 changes: 2 additions & 2 deletions crates/oq3_semantics/src/syntax_to_semantics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ where
T: AsRef<str>,
P: AsRef<Path>,
{
let parsed_source =
let parsed_source: SourceString =
oq3_source_file::parse_source_string(source, fake_file_path, search_path_list);
analyze_source(parsed_source)
}
Expand All @@ -110,7 +110,7 @@ where
T: AsRef<Path>,
P: AsRef<Path>,
{
let parsed_source = oq3_source_file::parse_source_file(file_path, search_path_list);
let parsed_source: SourceFile = oq3_source_file::parse_source_file(file_path, search_path_list);
analyze_source(parsed_source)
}

Expand Down
20 changes: 12 additions & 8 deletions crates/oq3_source_file/src/source_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,29 @@

use crate::api::{inner_print_compiler_errors, parse_source_file, print_compiler_errors};
use oq3_syntax::ast as synast; // Syntactic AST
use oq3_syntax::Parse;
use oq3_syntax::ParseOrErrors;
use oq3_syntax::TextRange;
use std::env;
use std::fs;
use std::path::{Path, PathBuf};

// I think SourceFile actually just works with the source as a string.
// Knowledge of any file is not used by synast::SourceFile;
pub(crate) type ParsedSource = ParseOrErrors<synast::SourceFile>;

pub(crate) fn parse_source_and_includes<P: AsRef<Path>>(
source: &str,
search_path_list: Option<&[P]>,
) -> (ParsedSource, Vec<SourceFile>) {
let syntax_ast: ParsedSource = synast::SourceFile::parse(source);
let included = parse_included_files(&syntax_ast, search_path_list);
(syntax_ast, included)
let parsed_source = synast::SourceFile::parse_check_lex(source);
let included = if parsed_source.have_parse() {
parse_included_files(&parsed_source, search_path_list)
} else {
Vec::<SourceFile>::new()
};
(parsed_source, included)
}

// I think SourceFile actually just works with the source as a string.
// Knowledge of any file is not used by synast::SourceFile;
pub(crate) type ParsedSource = Parse<synast::SourceFile>;

pub trait ErrorTrait {
fn message(&self) -> String;
fn range(&self) -> TextRange;
Expand Down
4 changes: 1 addition & 3 deletions crates/oq3_syntax/src/ast/make.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ use crate::{ast, AstNode, SourceFile, SyntaxKind, SyntaxToken}; // utils::is_raw
/// module defines shortcuts for common things.
///
/// It's named `ext` rather than `shortcuts` just to keep it short.
pub mod ext {
// GJL. This is intended to be used for semantic analysis, I think.
}
pub mod ext {}

pub fn expr_loop(block: ast::BlockExpr) -> ast::Expr {
expr_from_text(&format!("loop {block}"))
Expand Down
97 changes: 89 additions & 8 deletions crates/oq3_syntax/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,14 @@ impl<T> Parse<T> {
}

impl<T: AstNode> Parse<T> {
pub fn to_syntax(self) -> Parse<SyntaxNode> {
Parse {
green: self.green,
errors: self.errors,
_ty: PhantomData,
}
}
// FIXME: This is apparently not used anywhere
// pub fn to_syntax(self) -> Parse<SyntaxNode> {
// Parse {
// green: self.green,
// errors: self.errors,
// _ty: PhantomData,
// }
// }

pub fn tree(&self) -> T {
T::cast(self.syntax_node()).unwrap()
Expand Down Expand Up @@ -163,7 +164,70 @@ impl Parse<SourceFile> {
}
}

/// `SourceFile` represents a parse tree for a single Rust file.
// We have preserved `Parse<T>` above which is needed at least by make.rs.
// But it's probably best to consolidate `Parse` and `ParseOrErrors`. The
// former is still used in make.rs. We make very little use of this, only for
// a few tests. Some of these tests trigger lexer errors, some do not. We
// inherited the treatement from r-a, which always proceeds from lexing to parsing,
// and makes no distinction between lexer and parser errors.
// But for OQ3, it is convenient to demand:
// The parser only runs if there were no lexer errors.
// The semantic analysis only runs if there were no parser errors.
// Corollaries to these requirements are
// An improperly lexed stream, say one with tokens deemed "illegal", being ingested
// by the parser, implies a bug outside the parser.
// A syntactically incorrect ast being ingested by the semantic analyzer implies a bug
// outside the semantic analyzer.
/// Same as Parse<T> except that the `GreenNode` is wrapped in `Option`.
/// The `Option` is `None` if lexer errors were recorded, in which case no
/// parsing was done. In the same case, all errors will be lexer errors.
/// If there are no lexer errors, the parsing was done, and there is a `GreenNode`.
/// In this case any errors are parser errors.
#[derive(Debug, PartialEq, Eq)]
pub struct ParseOrErrors<T> {
green: Option<GreenNode>,
errors: Arc<Vec<SyntaxError>>,
_ty: PhantomData<fn() -> T>,
}

impl<T> Clone for ParseOrErrors<T> {
fn clone(&self) -> ParseOrErrors<T> {
ParseOrErrors {
green: self.green.clone(),
errors: self.errors.clone(),
_ty: PhantomData,
}
}
}

impl<T> ParseOrErrors<T> {
pub fn syntax_node(&self) -> SyntaxNode {
SyntaxNode::new_root(self.green.clone().unwrap())
}
pub fn errors(&self) -> &[SyntaxError] {
&self.errors
}
pub fn have_parse(&self) -> bool {
self.green.is_some()
}
}

impl<T: AstNode> ParseOrErrors<T> {
pub fn tree(&self) -> T {
T::cast(self.syntax_node()).unwrap()
}

// May not need to duplicate this, which is implemented for `Parse`
// pub fn ok(self) -> Result<T, Arc<Vec<SyntaxError>>> {
// if self.errors.is_empty() {
// Ok(self.tree())
// } else {
// Err(self.errors)
// }
// }
}

/// `SourceFile` represents a parse tree for a single OQ3 file.
pub use crate::ast::SourceFile;

impl SourceFile {
Expand All @@ -178,6 +242,23 @@ impl SourceFile {
_ty: PhantomData,
}
}

// If there are lexer errors, do not parse and do not return `Parse<SourceFile>`.
// If there are lexer errors, return `(None, Some(errors))`
// If there are no lexer errors, return `(Some(Parse), None)`
pub fn parse_check_lex(text: &str) -> ParseOrErrors<SourceFile> {
let (green_maybe, mut errors) = parsing::parse_text_check_lex(text);
if let Some(ref green) = green_maybe {
let root = SyntaxNode::new_root(green.clone());
errors.extend(validation::validate(&root));
assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
}
ParseOrErrors {
green: green_maybe,
errors: Arc::new(errors),
_ty: PhantomData,
}
}
}

/// Matches a `SyntaxNode` against an `ast` type.
Expand Down
24 changes: 24 additions & 0 deletions crates/oq3_syntax/src/parsing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,30 @@ pub fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
(node, errors)
}

pub fn parse_text_check_lex(text: &str) -> (Option<GreenNode>, Vec<SyntaxError>) {
let lexed = oq3_parser::LexedStr::new(text);
if lexed.errors_len() > 0 {
return (None, just_errors(lexed));
}
let parser_input = lexed.to_input();
let parser_output = oq3_parser::TopEntryPoint::SourceFile.parse(&parser_input);
let (node, errors, _eof) = build_tree(lexed, parser_output);
(Some(node), errors)
}

fn just_errors(lexed: oq3_parser::LexedStr<'_>) -> Vec<SyntaxError> {
let mut errors = Vec::<SyntaxError>::new();
for (i, err) in lexed.errors() {
let text_range = lexed.text_range(i);
let text_range = TextRange::new(
text_range.start.try_into().unwrap(),
text_range.end.try_into().unwrap(),
);
errors.push(SyntaxError::new(err, text_range))
}
errors
}

pub(crate) fn build_tree(
lexed: oq3_parser::LexedStr<'_>,
parser_output: oq3_parser::Output,
Expand Down

0 comments on commit 9705949

Please sign in to comment.