Skip to content

Commit 4e957d3

Browse files
committed
Always wrap in SoftKeywordTransformer
1 parent 454ec3e commit 4e957d3

File tree

4 files changed

+43
-28
lines changed

4 files changed

+43
-28
lines changed

compiler/parser/src/lexer.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,11 @@
1313
//!
1414
//! ```
1515
//! use rustpython_parser::lexer::{make_tokenizer, Tok};
16+
//! use rustpython_parser::mode::Mode;
1617
//! use rustpython_parser::token::StringKind;
1718
//!
1819
//! let source = "x = 'RustPython'";
19-
//! let tokens = make_tokenizer(source)
20+
//! let tokens = make_tokenizer(source, Mode::Module)
2021
//! .map(|tok| tok.expect("Failed to lex"))
2122
//! .collect::<Vec<_>>();
2223
//!
@@ -35,6 +36,8 @@
3536
pub use super::token::{StringKind, Tok};
3637
use crate::ast::Location;
3738
use crate::error::{LexicalError, LexicalErrorType};
39+
use crate::mode::Mode;
40+
use crate::soft_keywords::SoftKeywordTransformer;
3841
use num_bigint::BigInt;
3942
use num_traits::identities::Zero;
4043
use num_traits::Num;
@@ -197,27 +200,29 @@ pub type LexResult = Result<Spanned, LexicalError>;
197200
/// # Examples
198201
///
199202
/// ```
203+
/// use rustpython_parser::mode::Mode;
200204
/// use rustpython_parser::lexer::{make_tokenizer};
201205
///
202206
/// let source = "def hello(): return 'world'";
203-
/// let tokenizer = make_tokenizer(source);
207+
/// let tokenizer = make_tokenizer(source, Mode::Module);
204208
///
205209
/// for token in tokenizer {
206210
/// println!("{:?}", token);
207211
/// }
208212
/// ```
209213
#[inline]
210-
pub fn make_tokenizer(source: &str) -> impl Iterator<Item = LexResult> + '_ {
211-
make_tokenizer_located(source, Location::default())
214+
pub fn make_tokenizer(source: &str, mode: Mode) -> impl Iterator<Item = LexResult> + '_ {
215+
make_tokenizer_located(source, mode, Location::default())
212216
}
213217

214218
/// Create a new tokenizer from a source string, starting at a given location.
215219
/// You probably want to use [`make_tokenizer`] instead.
216220
pub fn make_tokenizer_located(
217221
source: &str,
222+
mode: Mode,
218223
start_location: Location,
219224
) -> impl Iterator<Item = LexResult> + '_ {
220-
Lexer::new(source.chars(), start_location)
225+
SoftKeywordTransformer::new(Lexer::new(source.chars(), start_location), mode)
221226
}
222227

223228
impl<T> Lexer<T>
@@ -1210,14 +1215,15 @@ where
12101215
#[cfg(test)]
12111216
mod tests {
12121217
use super::{make_tokenizer, StringKind, Tok};
1218+
use crate::mode::Mode;
12131219
use num_bigint::BigInt;
12141220

12151221
const WINDOWS_EOL: &str = "\r\n";
12161222
const MAC_EOL: &str = "\r";
12171223
const UNIX_EOL: &str = "\n";
12181224

12191225
pub fn lex_source(source: &str) -> Vec<Tok> {
1220-
let lexer = make_tokenizer(source);
1226+
let lexer = make_tokenizer(source, Mode::Module);
12211227
lexer.map(|x| x.unwrap().1).collect()
12221228
}
12231229

compiler/parser/src/lib.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,27 +66,29 @@
6666
//! For example, to get a stream of tokens from a given string, one could do this:
6767
//!
6868
//! ```
69+
//! use rustpython_parser::mode::Mode;
6970
//! use rustpython_parser::lexer::make_tokenizer;
7071
//!
7172
//! let python_source = r#"
7273
//! def is_odd(i):
7374
//! return bool(i & 1)
7475
//! "#;
75-
//! let mut tokens = make_tokenizer(python_source);
76+
//! let mut tokens = make_tokenizer(python_source, Mode::Module);
7677
//! assert!(tokens.all(|t| t.is_ok()));
7778
//! ```
7879
//!
7980
//! These tokens can be directly fed into the parser to generate an AST:
8081
//!
8182
//! ```
82-
//! use rustpython_parser::parser::{parse_tokens, Mode};
8383
//! use rustpython_parser::lexer::make_tokenizer;
84+
//! use rustpython_parser::mode::Mode;
85+
//! use rustpython_parser::parser::parse_tokens;
8486
//!
8587
//! let python_source = r#"
8688
//! def is_odd(i):
8789
//! return bool(i & 1)
8890
//! "#;
89-
//! let tokens = make_tokenizer(python_source);
91+
//! let tokens = make_tokenizer(python_source, Mode::Module);
9092
//! let ast = parse_tokens(tokens, Mode::Module, "<embedded>");
9193
//!
9294
//! assert!(ast.is_ok());
@@ -131,5 +133,5 @@ mod string;
131133
#[rustfmt::skip]
132134
mod python;
133135
mod context;
134-
pub mod soft_keywords;
136+
mod soft_keywords;
135137
pub mod token;

compiler/parser/src/parser.rs

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
1515
use crate::lexer::{LexResult, Tok};
1616
pub use crate::mode::Mode;
17-
use crate::soft_keywords::SoftKeywordTransformer;
1817
use crate::{ast, error::ParseError, lexer, python};
1918
use ast::Location;
2019
use itertools::Itertools;
@@ -107,7 +106,8 @@ pub fn parse_expression_located(
107106
/// parsing:
108107
///
109108
/// ```
110-
/// use rustpython_parser::parser::{parse, Mode};
109+
/// use rustpython_parser::mode::Mode;
110+
/// use rustpython_parser::parser::parse;
111111
///
112112
/// let expr = parse("1 + 2", Mode::Expression, "<embedded>");
113113
/// assert!(expr.is_ok());
@@ -116,7 +116,8 @@ pub fn parse_expression_located(
116116
/// Alternatively, we can parse a full Python program consisting of multiple lines:
117117
///
118118
/// ```
119-
/// use rustpython_parser::parser::{parse, Mode};
119+
/// use rustpython_parser::mode::Mode;
120+
/// use rustpython_parser::parser::parse;
120121
///
121122
/// let source = r#"
122123
/// class Greeter:
@@ -139,8 +140,9 @@ pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result<ast::Mod, Pa
139140
/// # Example
140141
///
141142
/// ```
142-
/// use rustpython_parser::parser::{parse_located, Mode};
143143
/// use rustpython_parser::ast::Location;
144+
/// use rustpython_parser::mode::Mode;
145+
/// use rustpython_parser::parser::parse_located;
144146
///
145147
/// let source = r#"
146148
/// def fib(i):
@@ -160,7 +162,7 @@ pub fn parse_located(
160162
source_path: &str,
161163
location: Location,
162164
) -> Result<ast::Mod, ParseError> {
163-
let lxr = lexer::make_tokenizer_located(source, location);
165+
let lxr = lexer::make_tokenizer_located(source, mode, location);
164166
parse_tokens(lxr, mode, source_path)
165167
}
166168

@@ -174,10 +176,11 @@ pub fn parse_located(
174176
/// them using the [`lexer::make_tokenizer`] function:
175177
///
176178
/// ```
177-
/// use rustpython_parser::parser::{parse_tokens, Mode};
178179
/// use rustpython_parser::lexer::make_tokenizer;
180+
/// use rustpython_parser::mode::Mode;
181+
/// use rustpython_parser::parser::parse_tokens;
179182
///
180-
/// let expr = parse_tokens(make_tokenizer("1 + 2"), Mode::Expression, "<embedded>");
183+
/// let expr = parse_tokens(make_tokenizer("1 + 2", Mode::Expression), Mode::Expression, "<embedded>");
181184
/// assert!(expr.is_ok());
182185
/// ```
183186
pub fn parse_tokens(
@@ -190,7 +193,7 @@ pub fn parse_tokens(
190193
.chain(lxr)
191194
.filter_ok(|(_, tok, _)| !matches!(tok, Tok::Comment { .. } | Tok::NonLogicalNewline));
192195
python::TopParser::new()
193-
.parse(SoftKeywordTransformer::new(tokenizer, mode).into_iter())
196+
.parse(tokenizer.into_iter())
194197
.map_err(|e| crate::error::parse_error_from_lalrpop(e, source_path))
195198
}
196199

compiler/parser/src/soft_keywords.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ pub struct SoftKeywordTransformer<I>
1919
where
2020
I: Iterator<Item = LexResult>,
2121
{
22-
pub underlying: MultiPeek<I>,
23-
pub start_of_line: bool,
22+
underlying: MultiPeek<I>,
23+
start_of_line: bool,
2424
}
2525

2626
impl<I> SoftKeywordTransformer<I>
@@ -84,14 +84,18 @@ where
8484

8585
self.start_of_line = next.as_ref().map_or(false, |lex_result| {
8686
lex_result.as_ref().map_or(false, |(_, tok, _)| {
87-
matches!(
88-
tok,
89-
Tok::StartModule
90-
| Tok::StartInteractive
91-
| Tok::Newline
92-
| Tok::Indent
93-
| Tok::Dedent
94-
)
87+
if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) {
88+
self.start_of_line
89+
} else {
90+
matches!(
91+
tok,
92+
Tok::StartModule
93+
| Tok::StartInteractive
94+
| Tok::Newline
95+
| Tok::Indent
96+
| Tok::Dedent
97+
)
98+
}
9599
})
96100
});
97101

0 commit comments

Comments
 (0)