Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[draft] Add Span to our Lexer error #4463

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 9 additions & 0 deletions prqlc/prqlc-ast/src/span.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@ impl From<Span> for Range<usize> {
a.start..a.end
}
}
impl From<Range<usize>> for Span {
fn from(range: Range<usize>) -> Self {
Span {
start: range.start,
end: range.end,
source_id: 0, // Default value as Range<usize> does not provide a source_id
}
}
}

impl Debug for Span {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
Expand Down
6 changes: 1 addition & 5 deletions prqlc/prqlc-parser/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@ impl Display for LexerError {
}
}

pub(crate) fn convert_lexer_error(
source: &str,
e: chumsky::error::Cheap<char>,
source_id: u16,
) -> Error {
pub(crate) fn convert_lexer_error(source: &str, e: LError, source_id: u16) -> Error {
// TODO: is there a neater way of taking a span? We want to take it based on
// the chars, not the bytes, so can't just index into the str.
let found = source
Expand Down
19 changes: 14 additions & 5 deletions prqlc/prqlc-parser/src/interpolation.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use chumsky::{error::Cheap, prelude::*};
use chumsky::prelude::*;
use itertools::Itertools;
use prqlc_ast::expr::*;

Expand Down Expand Up @@ -97,7 +97,7 @@ fn parse_interpolate() {
"###);
}

fn parser(span_base: ParserSpan) -> impl Parser<char, Vec<InterpolateItem>, Error = Cheap<char>> {
fn parser(span_base: ParserSpan) -> impl Parser<char, Vec<InterpolateItem>, Error = LError> {
let expr = ident_part()
.map_with_span(move |name, s| (name, offset_span(span_base, s)))
.separated_by(just('.'))
Expand Down Expand Up @@ -134,10 +134,19 @@ fn parser(span_base: ParserSpan) -> impl Parser<char, Vec<InterpolateItem>, Erro
expr.or(string).repeated().then_ignore(end())
}

fn offset_span(base: ParserSpan, range: std::ops::Range<usize>) -> ParserSpan {
// fn offset_span(base: ParserSpan, range: std::ops::Range<usize>) -> ParserSpan {
// ParserSpan(Span {
// start: base.0.start + range.start,
// end: base.0.start + range.end,
// source_id: base.0.source_id,
// })
// }

fn offset_span(base: ParserSpan, range: ParserSpan) -> ParserSpan {
// base + range
ParserSpan(Span {
start: base.0.start + range.start,
end: base.0.start + range.end,
start: base.0.start + range.0.start,
end: base.0.start + range.0.end,
source_id: base.0.source_id,
})
}
40 changes: 21 additions & 19 deletions prqlc/prqlc-parser/src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
use chumsky::{
error::Cheap,
prelude::*,
text::{newline, Character},
};
Expand All @@ -10,7 +9,8 @@ use serde::{Deserialize, Serialize};
#[derive(Clone, PartialEq, Serialize, Deserialize)]
pub struct Token {
pub kind: TokenKind,
pub span: std::ops::Range<usize>,
// pub span: Span,
pub span: crate::span::ParserSpan,
}

#[derive(Clone, PartialEq, Debug, Serialize, Deserialize)]
Expand Down Expand Up @@ -66,16 +66,18 @@ pub enum TokenKind {
LineWrap(Vec<TokenKind>),
}

pub type LError = Simple<char, crate::span::ParserSpan>;

/// Lex chars to tokens until the end of the input
pub fn lexer() -> impl Parser<char, Vec<Token>, Error = Cheap<char>> {
pub fn lexer() -> impl Parser<char, Vec<Token>, Error = LError> {
lex_token()
.repeated()
.then_ignore(ignored())
.then_ignore(end())
}

/// Lex chars to a single token
pub fn lex_token() -> impl Parser<char, Token, Error = Cheap<char>> {
pub fn lex_token() -> impl Parser<char, Token, Error = LError> {
let control_multi = choice((
just("->").to(TokenKind::ArrowThin),
just("=>").to(TokenKind::ArrowFat),
Expand Down Expand Up @@ -155,18 +157,18 @@ pub fn lex_token() -> impl Parser<char, Token, Error = Cheap<char>> {
))
}

fn ignored() -> impl Parser<char, (), Error = Cheap<char>> {
fn ignored() -> impl Parser<char, (), Error = LError> {
whitespace().repeated().ignored()
}

fn whitespace() -> impl Parser<char, (), Error = Cheap<char>> {
fn whitespace() -> impl Parser<char, (), Error = LError> {
filter(|x: &char| x.is_inline_whitespace())
.repeated()
.at_least(1)
.ignored()
}

fn line_wrap() -> impl Parser<char, TokenKind, Error = Cheap<char>> {
fn line_wrap() -> impl Parser<char, TokenKind, Error = LError> {
newline()
.ignore_then(
whitespace()
Expand All @@ -180,7 +182,7 @@ fn line_wrap() -> impl Parser<char, TokenKind, Error = Cheap<char>> {
.map(TokenKind::LineWrap)
}

fn comment() -> impl Parser<char, TokenKind, Error = Cheap<char>> {
fn comment() -> impl Parser<char, TokenKind, Error = LError> {
just('#').ignore_then(choice((
just('!').ignore_then(
newline()
Expand All @@ -197,7 +199,7 @@ fn comment() -> impl Parser<char, TokenKind, Error = Cheap<char>> {
)))
}

pub fn ident_part() -> impl Parser<char, String, Error = Cheap<char>> + Clone {
pub fn ident_part() -> impl Parser<char, String, Error = LError> + Clone {
let plain = filter(|c: &char| c.is_alphabetic() || *c == '_')
.chain(filter(|c: &char| c.is_alphanumeric() || *c == '_').repeated());

Expand All @@ -206,7 +208,7 @@ pub fn ident_part() -> impl Parser<char, String, Error = Cheap<char>> + Clone {
plain.or(backticks).collect()
}

fn literal() -> impl Parser<char, Literal, Error = Cheap<char>> {
fn literal() -> impl Parser<char, Literal, Error = LError> {
let binary_notation = just("0b")
.then_ignore(just("_").or_not())
.ignore_then(
Expand Down Expand Up @@ -270,7 +272,7 @@ fn literal() -> impl Parser<char, Literal, Error = Cheap<char>> {
} else if let Ok(f) = str.parse::<f64>() {
Ok(Literal::Float(f))
} else {
Err(Cheap::expected_input_found(span, None, None))
Err(LError::expected_input_found(span, None, None))
}
})
.labelled("number");
Expand Down Expand Up @@ -307,7 +309,7 @@ fn literal() -> impl Parser<char, Literal, Error = Cheap<char>> {
let unit = unit.to_string();
Ok(ValueAndUnit { n, unit })
} else {
Err(Cheap::expected_input_found(span, None, None))
Err(LError::expected_input_found(span, None, None))
}
})
.map(Literal::ValueAndUnit);
Expand Down Expand Up @@ -391,7 +393,7 @@ fn literal() -> impl Parser<char, Literal, Error = Cheap<char>> {
))
}

fn quoted_string(escaped: bool) -> impl Parser<char, String, Error = Cheap<char>> {
fn quoted_string(escaped: bool) -> impl Parser<char, String, Error = LError> {
choice((
quoted_string_of_quote(&'"', escaped),
quoted_string_of_quote(&'\'', escaped),
Expand All @@ -403,7 +405,7 @@ fn quoted_string(escaped: bool) -> impl Parser<char, String, Error = Cheap<char>
fn quoted_string_of_quote(
quote: &char,
escaping: bool,
) -> impl Parser<char, Vec<char>, Error = Cheap<char>> + '_ {
) -> impl Parser<char, Vec<char>, Error = LError> + '_ {
let opening = just(*quote).repeated().at_least(1);

opening.then_with(move |opening| {
Expand Down Expand Up @@ -431,7 +433,7 @@ fn quoted_string_of_quote(
})
}

fn escaped_character() -> impl Parser<char, char, Error = Cheap<char>> {
fn escaped_character() -> impl Parser<char, char, Error = LError> {
just('\\').ignore_then(choice((
just('\\'),
just('/'),
Expand All @@ -448,7 +450,7 @@ fn escaped_character() -> impl Parser<char, char, Error = Cheap<char>> {
.collect::<String>()
.validate(|digits, span, emit| {
char::from_u32(u32::from_str_radix(&digits, 16).unwrap()).unwrap_or_else(|| {
emit(Cheap::expected_input_found(span, None, None));
emit(LError::expected_input_found(span, None, None));
'\u{FFFD}' // Unicode replacement character
})
})
Expand All @@ -461,21 +463,21 @@ fn escaped_character() -> impl Parser<char, char, Error = Cheap<char>> {
.collect::<String>()
.validate(|digits, span, emit| {
char::from_u32(u32::from_str_radix(&digits, 16).unwrap()).unwrap_or_else(|| {
emit(Cheap::expected_input_found(span, None, None));
emit(LError::expected_input_found(span, None, None));
'\u{FFFD}'
})
}),
)),
)))
}

fn digits(count: usize) -> impl Parser<char, Vec<char>, Error = Cheap<char>> {
fn digits(count: usize) -> impl Parser<char, Vec<char>, Error = LError> {
filter(|c: &char| c.is_ascii_digit())
.repeated()
.exactly(count)
}

fn end_expr() -> impl Parser<char, (), Error = Cheap<char>> {
fn end_expr() -> impl Parser<char, (), Error = LError> {
choice((
end(),
one_of(",)]}\t >").ignored(),
Expand Down
4 changes: 2 additions & 2 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ fn prepare_stream(
) -> Stream<TokenKind, ParserSpan, impl Iterator<Item = (TokenKind, ParserSpan)> + Sized> {
let tokens = tokens
.into_iter()
.map(move |token| (token.kind, ParserSpan::new(source_id, token.span)));
.map(move |token| (token.kind, ParserSpan::new(source_id, token.span.into())));
let len = source.chars().count();
let eoi = ParserSpan(Span {
start: len,
Expand All @@ -148,7 +148,7 @@ fn prepare_stream(
Stream::from_iter(eoi, tokens)
}

fn convert_lexer_error(source: &str, e: chumsky::error::Cheap<char>, source_id: u16) -> Error {
fn convert_lexer_error(source: &str, e: lexer::LError, source_id: u16) -> Error {
// We want to slice based on the chars, not the bytes, so can't just index
// into the str.
let found = source
Expand Down
16 changes: 15 additions & 1 deletion prqlc/prqlc-parser/src/span.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,24 @@
use std::ops::{Add, Deref, DerefMut, Sub};

use serde::{Deserialize, Serialize};

use crate::Span;

#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
pub struct ParserSpan(pub crate::Span);

// impl From<ParserSpan> for Span {
// fn from(span: ParserSpan) -> Self {
// span.0
// }
// }

// impl From<ParserSpan> for std::ops::Range<usize> {
// fn from(value: ParserSpan) -> Self {
// value.0.into()
// }
// }

impl Deref for ParserSpan {
type Target = Span;

Expand Down