Skip to content

Commit

Permalink
Auto merge of #63709 - matklad:decomposed-tokens, r=petrochenkov
Browse files Browse the repository at this point in the history
Move token gluing to token stream parsing

work towards #63689, this moves token gluing from the lexer to the token tree layer. This is only a minimal step, but I like the negative diff here.

r? @petrochenkov
  • Loading branch information
bors committed Aug 20, 2019
2 parents 7858dc2 + 914e1f4 commit 1489095
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 209 deletions.
176 changes: 14 additions & 162 deletions src/librustc_lexer/src/lib.rs
Expand Up @@ -23,9 +23,6 @@ pub enum TokenKind {
Lifetime { starts_with_number: bool },
Semi,
Comma,
DotDotDot,
DotDotEq,
DotDot,
Dot,
OpenParen,
CloseParen,
Expand All @@ -37,41 +34,19 @@ pub enum TokenKind {
Pound,
Tilde,
Question,
ColonColon,
Colon,
Dollar,
EqEq,
Eq,
FatArrow,
Ne,
Not,
Le,
LArrow,
Lt,
ShlEq,
Shl,
Ge,
Gt,
ShrEq,
Shr,
RArrow,
Minus,
MinusEq,
And,
AndAnd,
AndEq,
Or,
OrOr,
OrEq,
PlusEq,
Plus,
StarEq,
Star,
SlashEq,
Slash,
CaretEq,
Caret,
PercentEq,
Percent,
Unknown,
}
Expand Down Expand Up @@ -135,13 +110,7 @@ impl Cursor<'_> {
'/' => match self.nth_char(0) {
'/' => self.line_comment(),
'*' => self.block_comment(),
_ => {
if self.eat_assign() {
SlashEq
} else {
Slash
}
}
_ => Slash,
},
c if character_properties::is_whitespace(c) => self.whitespace(),
'r' => match (self.nth_char(0), self.nth_char(1)) {
Expand Down Expand Up @@ -199,22 +168,7 @@ impl Cursor<'_> {
}
';' => Semi,
',' => Comma,
'.' => {
if self.nth_char(0) == '.' {
self.bump();
if self.nth_char(0) == '.' {
self.bump();
DotDotDot
} else if self.nth_char(0) == '=' {
self.bump();
DotDotEq
} else {
DotDot
}
} else {
Dot
}
}
'.' => Dot,
'(' => OpenParen,
')' => CloseParen,
'{' => OpenBrace,
Expand All @@ -225,112 +179,19 @@ impl Cursor<'_> {
'#' => Pound,
'~' => Tilde,
'?' => Question,
':' => {
if self.nth_char(0) == ':' {
self.bump();
ColonColon
} else {
Colon
}
}
':' => Colon,
'$' => Dollar,
'=' => {
if self.nth_char(0) == '=' {
self.bump();
EqEq
} else if self.nth_char(0) == '>' {
self.bump();
FatArrow
} else {
Eq
}
}
'!' => {
if self.nth_char(0) == '=' {
self.bump();
Ne
} else {
Not
}
}
'<' => match self.nth_char(0) {
'=' => {
self.bump();
Le
}
'<' => {
self.bump();
if self.eat_assign() { ShlEq } else { Shl }
}
'-' => {
self.bump();
LArrow
}
_ => Lt,
},
'>' => match self.nth_char(0) {
'=' => {
self.bump();
Ge
}
'>' => {
self.bump();
if self.eat_assign() { ShrEq } else { Shr }
}
_ => Gt,
},
'-' => {
if self.nth_char(0) == '>' {
self.bump();
RArrow
} else {
if self.eat_assign() { MinusEq } else { Minus }
}
}
'&' => {
if self.nth_char(0) == '&' {
self.bump();
AndAnd
} else {
if self.eat_assign() { AndEq } else { And }
}
}
'|' => {
if self.nth_char(0) == '|' {
self.bump();
OrOr
} else {
if self.eat_assign() { OrEq } else { Or }
}
}
'+' => {
if self.eat_assign() {
PlusEq
} else {
Plus
}
}
'*' => {
if self.eat_assign() {
StarEq
} else {
Star
}
}
'^' => {
if self.eat_assign() {
CaretEq
} else {
Caret
}
}
'%' => {
if self.eat_assign() {
PercentEq
} else {
Percent
}
}
'=' => Eq,
'!' => Not,
'<' => Lt,
'>' => Gt,
'-' => Minus,
'&' => And,
'|' => Or,
'+' => Plus,
'*' => Star,
'^' => Caret,
'%' => Percent,
'\'' => self.lifetime_or_char(),
'"' => {
let terminated = self.double_quoted_string();
Expand Down Expand Up @@ -643,15 +504,6 @@ impl Cursor<'_> {
self.bump();
}
}

fn eat_assign(&mut self) -> bool {
if self.nth_char(0) == '=' {
self.bump();
true
} else {
false
}
}
}

pub mod character_properties {
Expand Down
25 changes: 0 additions & 25 deletions src/libsyntax/parse/lexer/mod.rs
Expand Up @@ -273,9 +273,6 @@ impl<'a> StringReader<'a> {
}
rustc_lexer::TokenKind::Semi => token::Semi,
rustc_lexer::TokenKind::Comma => token::Comma,
rustc_lexer::TokenKind::DotDotDot => token::DotDotDot,
rustc_lexer::TokenKind::DotDotEq => token::DotDotEq,
rustc_lexer::TokenKind::DotDot => token::DotDot,
rustc_lexer::TokenKind::Dot => token::Dot,
rustc_lexer::TokenKind::OpenParen => token::OpenDelim(token::Paren),
rustc_lexer::TokenKind::CloseParen => token::CloseDelim(token::Paren),
Expand All @@ -287,42 +284,20 @@ impl<'a> StringReader<'a> {
rustc_lexer::TokenKind::Pound => token::Pound,
rustc_lexer::TokenKind::Tilde => token::Tilde,
rustc_lexer::TokenKind::Question => token::Question,
rustc_lexer::TokenKind::ColonColon => token::ModSep,
rustc_lexer::TokenKind::Colon => token::Colon,
rustc_lexer::TokenKind::Dollar => token::Dollar,
rustc_lexer::TokenKind::EqEq => token::EqEq,
rustc_lexer::TokenKind::Eq => token::Eq,
rustc_lexer::TokenKind::FatArrow => token::FatArrow,
rustc_lexer::TokenKind::Ne => token::Ne,
rustc_lexer::TokenKind::Not => token::Not,
rustc_lexer::TokenKind::Le => token::Le,
rustc_lexer::TokenKind::LArrow => token::LArrow,
rustc_lexer::TokenKind::Lt => token::Lt,
rustc_lexer::TokenKind::ShlEq => token::BinOpEq(token::Shl),
rustc_lexer::TokenKind::Shl => token::BinOp(token::Shl),
rustc_lexer::TokenKind::Ge => token::Ge,
rustc_lexer::TokenKind::Gt => token::Gt,
rustc_lexer::TokenKind::ShrEq => token::BinOpEq(token::Shr),
rustc_lexer::TokenKind::Shr => token::BinOp(token::Shr),
rustc_lexer::TokenKind::RArrow => token::RArrow,
rustc_lexer::TokenKind::Minus => token::BinOp(token::Minus),
rustc_lexer::TokenKind::MinusEq => token::BinOpEq(token::Minus),
rustc_lexer::TokenKind::And => token::BinOp(token::And),
rustc_lexer::TokenKind::AndEq => token::BinOpEq(token::And),
rustc_lexer::TokenKind::AndAnd => token::AndAnd,
rustc_lexer::TokenKind::Or => token::BinOp(token::Or),
rustc_lexer::TokenKind::OrEq => token::BinOpEq(token::Or),
rustc_lexer::TokenKind::OrOr => token::OrOr,
rustc_lexer::TokenKind::Plus => token::BinOp(token::Plus),
rustc_lexer::TokenKind::PlusEq => token::BinOpEq(token::Plus),
rustc_lexer::TokenKind::Star => token::BinOp(token::Star),
rustc_lexer::TokenKind::StarEq => token::BinOpEq(token::Star),
rustc_lexer::TokenKind::Slash => token::BinOp(token::Slash),
rustc_lexer::TokenKind::SlashEq => token::BinOpEq(token::Slash),
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
rustc_lexer::TokenKind::CaretEq => token::BinOpEq(token::Caret),
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
rustc_lexer::TokenKind::PercentEq => token::BinOpEq(token::Percent),

rustc_lexer::TokenKind::Unknown => {
let c = self.str_from(start).chars().next().unwrap();
Expand Down
32 changes: 20 additions & 12 deletions src/libsyntax/parse/lexer/tests.rs
Expand Up @@ -75,42 +75,50 @@ fn mk_lit(kind: token::LitKind, symbol: &str, suffix: Option<&str>) -> TokenKind
}

#[test]
fn doublecolonparsing() {
fn doublecolon_parsing() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a b".to_string()),
vec![mk_ident("a"), token::Whitespace, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a b".to_string()),
vec![mk_ident("a"), token::Whitespace, mk_ident("b")],
);
})
}

#[test]
fn dcparsing_2() {
fn doublecolon_parsing_2() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a::b".to_string()),
vec![mk_ident("a"), token::ModSep, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a::b".to_string()),
vec![mk_ident("a"), token::Colon, token::Colon, mk_ident("b")],
);
})
}

#[test]
fn dcparsing_3() {
fn doublecolon_parsing_3() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a ::b".to_string()),
vec![mk_ident("a"), token::Whitespace, token::ModSep, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a ::b".to_string()),
vec![mk_ident("a"), token::Whitespace, token::Colon, token::Colon, mk_ident("b")],
);
})
}

#[test]
fn dcparsing_4() {
fn doublecolon_parsing_4() {
with_default_globals(|| {
let sm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let sh = mk_sess(sm.clone());
check_tokenization(setup(&sm, &sh, "a:: b".to_string()),
vec![mk_ident("a"), token::ModSep, token::Whitespace, mk_ident("b")]);
check_tokenization(
setup(&sm, &sh, "a:: b".to_string()),
vec![mk_ident("a"), token::Colon, token::Colon, token::Whitespace, mk_ident("b")],
);
})
}

Expand Down

0 comments on commit 1489095

Please sign in to comment.