diff --git a/.gitignore b/.gitignore index d3b8e4b3..c2d8045d 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ Cargo.lock *.log *.js package-lock.json +moz-central \ No newline at end of file diff --git a/.travis.yml b/.travis.yml index f1bfc771..6e1fae9d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ matrix: before_cache: - rm -rf /home/travis/.cargo/registry script: - - cargo test + - cargo test --features moz_central - cargo run --example major_libs --release after_script: - git add proptest-regressions diff --git a/Cargo.toml b/Cargo.toml index 6d7d2dde..98ad61d5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ress" -version = "0.6.2" +version = "0.6.3" authors = ["Robert Masen "] description = "A sanner/tokenizer for JS files" keywords = ["JavaScript", "parsing", "JS", "ES", "ECMA"] @@ -14,10 +14,12 @@ travis-ci = { repository = "FreeMasen/RESS", branch = "master" } appveyor = { repository = "FreeMasen/RESS", branch = "master", service = "github" } [dependencies] -combine = "3.3" +combine = "3" log = "0.4" unic-ucd-ident = { version = "0.7", features = ["id"] } - +flate2 = { version = "1", optional = true } +reqwest = { version = "0.9", optional = true } +tar = { version = "0.4", optional = true} [dev-dependencies] docopt = "1.0" @@ -26,6 +28,12 @@ serde_derive = "1.0" proptest = "0.8" walkdir = "2.2" pretty_env_logger = "0.2" +regex_generate = "0.2.0" +lazy_static = "1" + +[features] +default = [] +moz_central = ["flate2", "reqwest", "tar"] [[example]] name = "major_libs" diff --git a/bench.md b/bench.md new file mode 100644 index 00000000..38b909e6 --- /dev/null +++ b/bench.md @@ -0,0 +1,13 @@ +| output | refs bench | refs +/- | orig bench | orig +/- | +| --------- | ---------- | -------- | ---------- | -------- | +| bools | 196 | 12 | 498 | 28 | +| comments | 5,830 | 1260 | 8,658 | 198 | +| idents | 32,232 | 947 | 37,967 | 1680 | +| keywords | 23,050 | 6,436 | 20,375 | 197 | +| null | 83 | 5 | 97 | 3 | +| numbers | 11,521 | 2,872 | 21,999 | 621 | +| punct | 24,985 | 497 | 34,006 | 1,918 | +| regex | 26,558 | 642 | 71,059 | 2,093 | +| strings | 23,251 | 4,121 | 50,262 | 1,363 | +| templates | 34,015 | 544 | 68,190 | 921 | +| token | 293,377 | 9,556 | 343,875 | 9,141 | diff --git a/benches/major_libs.rs b/benches/major_libs.rs new file mode 100644 index 00000000..d721e6b5 --- /dev/null +++ b/benches/major_libs.rs @@ -0,0 +1,14 @@ +#![cfg(test)] +#![feature(test)] +extern crate ress; +extern crate test; +use ress::Scanner; +use test::Bencher; +#[bench] +fn angular(b: &mut Bencher) { + let js = include_str!("../node_modules/angular/angular.js"); + b.iter(|| { + let s = Scanner::new(js); + let _: Vec = s.collect(); + }) +} diff --git a/benches/numbers.rs b/benches/numbers.rs new file mode 100644 index 00000000..7c36eac9 --- /dev/null +++ b/benches/numbers.rs @@ -0,0 +1,15 @@ +#![cfg(test)] +#![feature(test)] +extern crate combine; +extern crate ress; +extern crate test; + +use combine::Parser; +use ress::numeric::literal as number; +use test::Bencher; +#[bench] +fn number_non_decimal(b: &mut Bencher) { + b.iter(|| { + number().parse("0x5541ff6").unwrap(); + }); +} diff --git a/benches/ref_perf_vs.rs b/benches/ref_perf_vs.rs new file mode 100644 index 00000000..3a3dae26 --- /dev/null +++ b/benches/ref_perf_vs.rs @@ -0,0 +1,415 @@ +#![cfg(test)] +#![feature(test)] +extern crate combine; +extern crate ress; +extern crate test; +#[macro_use] +extern crate lazy_static; + +use combine::Parser; +use test::{black_box, Bencher}; +static KEYWORDS: &[&str] = &[ + "implements", + "interface", + "package", + "private", + "protected", + "public", + "static", + "yield", + "let", + "enum", + "export", + "import", + "super", + "break", + "case", + "catch", + "continue", + "debugger", + "default", + "delete", + "do", + "else", + "finally", + "for", + "function", + "if", + "instanceof", + "in", + "new", + "return", + "switch", + "this", + "throw", + "try", + "typeof", + "var", + "void", + "while", + "with", +]; +static PUNCTS: &[&str] = &[ + "{", "}", "(", ")", ".", ";", ",", "[", "]", ":", "?", "~", ">", "<", "=", "!", "+", "-", "/", + "*", "%", "&", "|", "^", ">>>=", //3 char + "...", "===", "!==", ">>>", "<<=", ">>=", "**=", //2 char + "&&", "||", "==", "!=", "+=", "-=", "*=", "/=", "++", "--", "<<", ">>", "&=", "|=", "^=", "%=", + "<=", ">=", "=>", "**", +]; + +static STRINGS: &[&str] = &[ + r#""things and stuff""#, + r#"'people and places'"#, + r#""with and escaped \"""#, + r#"'another escaped \''"#, + r#""with a new \ +line""#, + r#"'another new line \ +hahaha'"#, + "\"sequence double quoted\\\r\nis hard\"", + "'new line sequence\\\r\nmight be harder'", +]; + +static COMMENTS: &[&str] = &[ + "//this is a comment", + "/*this is a +multi-line comment*/", + "", + " with a trailer", +]; + +static NUMBERS: &[&str] = &[ + "0", + "00", + "1234567890", + "01234567", + "0.", + "0.00", + "10.00", + ".0", + ".0", + "0e0", + "0E0", + "0.e0", + "0.00e+0", + ".00e-0", + "0x0", + "0X0", + "0x0123456789abcdefABCDEF", + "0b0", + "0b0100101", + "0o0", + "0o777", + "2e308", +]; +static REGEX: &[&str] = &[ + r#"x/"#, + r#"|/"#, + r#"|||/"#, + r#"^$\b\B/"#, + r#"(?=(?!(?:(.))))/"#, + r#"a.\f\n\r\t\v\0\[\-\/\\\x00\u0000/"#, + r#"\d\D\s\S\w\W/"#, + r#"\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz/"#, + r#"\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ/"#, + r#"[a-z-]/"#, + r#"[^\b\-^]/"#, + r#"[/\]\\]/"#, + r#"./i"#, + r#"./g"#, + r#"./m"#, + r#"./igm"#, + r#".*/"#, + r#".*?/"#, + r#".+/"#, + r#".+?/"#, + r#".?/"#, + r#".??/"#, + r#".{0}/"#, + r#".{0,}/"#, + r#".{0,0}/"#, +]; + +static TEMPLATE_STARTS: &[&str] = &[ + "`things and stuff times ${", + "`things and stuff`", + r#"`a\${b`"#, + r#"`\0\n\x0A\u000A\u{A}${"#, +]; + +static TEMPLATE_CONTINUATIONS: &[&str] = &[ + " and animals and minerals`", + "`}`", + " and animals and minerals`", + " and places and people ${", +]; + +static IDENTS: &[&str] = &[ + r#"$"#, + r#"_"#, + r#"\u0078"#, + r#"x$"#, + r#"x_"#, + r#"x\u0030"#, + r#"xa"#, + r#"x0"#, + r#"x0a"#, + r#"x0123456789"#, + r#"qwertyuiopasdfghjklzxcvbnm"#, + r#"QWERTYUIOPASDFGHJKLZXCVBNM"#, + r#"œ一"#, + r#"ǻ둘"#, + r#"ɤ〩"#, + r#"φ"#, + r#"fiⅷ"#, + r#"ユニコード"#, + r#"x‌‍"#, +]; + +static BOOLS: &[&str] = &["true", "false"]; + +static NULL: &[&str] = &["null"]; + +lazy_static! { + static ref TOKENS: Vec<&'static str> = COMMENTS + .into_iter() + .chain(KEYWORDS.into_iter()) + .chain(NUMBERS.into_iter()) + .chain(PUNCTS.into_iter()) + .chain(IDENTS.into_iter()) + .chain(BOOLS.into_iter()) + .chain(NULL.into_iter()) + .chain(TEMPLATE_STARTS.into_iter()) + .map(|s| *s) + .collect(); +} + +#[bench] +fn keywords(b: &mut Bencher) { + b.iter(|| { + for key in KEYWORDS { + black_box(ress::keywords::literal().parse(*key).unwrap()); + } + }) +} + +#[bench] +fn keywords_ref(b: &mut Bencher) { + b.iter(|| { + for key in KEYWORDS { + black_box(ress::refs::keywords::literal().parse(*key).unwrap()); + } + }) +} + +#[bench] +fn punct(b: &mut Bencher) { + b.iter(|| { + for punct in PUNCTS { + black_box(ress::punct::punctuation().parse(*punct).unwrap()); + } + }); +} + +#[bench] +fn punct_ref(b: &mut Bencher) { + b.iter(|| { + for punct in PUNCTS { + black_box(ress::refs::punct::punctuation().parse(*punct).unwrap()); + } + }); +} + +#[bench] +fn strings(b: &mut Bencher) { + b.iter(|| { + for s in STRINGS { + black_box(ress::strings::literal().parse(*s).unwrap()); + } + }) +} + +#[bench] +fn strings_ref(b: &mut Bencher) { + b.iter(|| { + for s in STRINGS { + black_box(ress::refs::strings::literal().parse(*s).unwrap()); + } + }) +} + +#[bench] +fn comments(b: &mut Bencher) { + b.iter(|| { + for c in COMMENTS { + black_box(ress::comments::comment().parse(*c).unwrap()); + } + }) +} + +#[bench] +fn comments_refs(b: &mut Bencher) { + b.iter(|| { + for c in COMMENTS { + black_box(ress::refs::comments::comment().parse(*c).unwrap()); + } + }) +} + +#[bench] +fn numbers(b: &mut Bencher) { + b.iter(|| { + for n in NUMBERS { + black_box(ress::numeric::literal().parse(*n).unwrap()); + } + }) +} + +#[bench] +fn numbers_ref(b: &mut Bencher) { + b.iter(|| { + for n in NUMBERS { + black_box(ress::refs::numbers::literal().parse(*n).unwrap()); + } + }) +} + +#[bench] +fn regex(b: &mut Bencher) { + b.iter(|| { + for r in REGEX { + black_box(ress::regex::regex_tail().parse(*r).unwrap()); + } + }) +} + +#[bench] +fn regex_ref(b: &mut Bencher) { + b.iter(|| { + for r in REGEX { + black_box(ress::refs::regex::regex_tail().parse(*r).unwrap()); + } + }) +} + +#[bench] +fn templates(b: &mut Bencher) { + b.iter(|| { + for t in TEMPLATE_CONTINUATIONS { + black_box(ress::strings::template_continuation().parse(*t).unwrap()); + } + for t in TEMPLATE_STARTS { + black_box(ress::strings::template_start().parse(*t).unwrap()); + } + }) +} + +#[bench] +fn templates_ref(b: &mut Bencher) { + b.iter(|| { + for t in TEMPLATE_CONTINUATIONS { + black_box( + ress::refs::strings::template_continuation() + .parse(*t) + .unwrap(), + ); + } + for t in TEMPLATE_STARTS { + black_box(ress::refs::strings::template_start().parse(*t).unwrap()); + } + }) +} + +#[bench] +fn bools(b: &mut Bencher) { + b.iter(|| { + for b in BOOLS { + black_box(ress::tokens::boolean_literal().parse(*b).unwrap()); + } + }) +} + +#[bench] +fn bools_ref(b: &mut Bencher) { + b.iter(|| { + for b in BOOLS { + black_box(ress::refs::tokens::boolean_literal().parse(*b).unwrap()); + } + }) +} + +#[bench] +fn null(b: &mut Bencher) { + b.iter(|| { + for n in NULL { + black_box(ress::tokens::null_literal().parse(*n).unwrap()); + } + }); +} +#[bench] +fn null_ref(b: &mut Bencher) { + b.iter(|| { + for n in NULL { + black_box(ress::refs::tokens::null_literal().parse(*n).unwrap()); + } + }); +} + +#[bench] +fn idents(b: &mut Bencher) { + b.iter(|| { + for i in IDENTS { + black_box(ress::tokens::ident().parse(*i).unwrap()); + } + }) +} + +#[bench] +fn idents_ref(b: &mut Bencher) { + b.iter(|| { + for i in IDENTS { + black_box(ress::refs::tokens::ident().parse(*i).unwrap()); + } + }) +} + +#[bench] +pub fn token(b: &mut Bencher) { + b.iter(|| { + for t in TOKENS.iter() { + black_box(ress::tokens::token().parse(*t).unwrap()); + } + }) +} + +#[bench] +pub fn token_ref(b: &mut Bencher) { + println!("["); + b.iter(|| { + for t in TOKENS.iter() { + println!("{:?}", t); + black_box(ress::refs::tokens::token().parse(*t).unwrap()); + } + println!("];") + }) +} + +#[bench] +fn scanner(b: &mut Bencher) { + let js = include_str!("../node_modules/jquery/dist/jquery.js"); + use ress::{Item, Scanner}; + b.iter(|| { + let s = Scanner::new(js); + black_box(s.collect::>()) + }); +} + +#[bench] +fn scanner_ref(b: &mut Bencher) { + let js = include_str!("../node_modules/jquery/dist/jquery.js"); + use ress::refs::{RefItem as Item, RefScanner as Scanner}; + b.iter(|| { + let s = Scanner::new(js); + black_box(s.collect::>()) + }); +} diff --git a/examples/major_libs/src/main.rs b/examples/major_libs/src/main.rs index 0d2d8234..35beb295 100644 --- a/examples/major_libs/src/main.rs +++ b/examples/major_libs/src/main.rs @@ -12,103 +12,157 @@ use std::{ time::{Duration, SystemTime}, }; +struct Args { + pub refs: bool, + pub angular: bool, + pub jquery: bool, + pub react: bool, + pub react_dom: bool, + pub vue: bool, + pub moment: bool, + pub dexie: bool, +} + +impl ::std::default::Default for Args { + fn default() -> Args { + Args { + refs: false, + angular: false, + jquery: false, + react: false, + react_dom: false, + vue: false, + moment: false, + dexie: false, + } + } +} + +impl Args { + fn pristine(&self) -> bool { + !self.angular + && !self.jquery + && !self.react + && !self.react_dom + && !self.vue + && !self.moment + && !self.dexie + } +} + fn main() { - let mut i = 0; + let mut a = Args::default(); // loop over the ags and check for // lib names. If they exist, run the test // and increment the counter for arg in args() { if arg == "jquery" || arg == "jq" { - i += 1; - jquery(); + a.jquery = true; } else if arg == "angular" || arg == "ng" { - i += 1; - angular1(); + a.angular = true; } else if arg == "react" { - i += 1; - react(); + a.react = true; } else if arg == "react-dom" || arg == "rd" { - i += 1; - react_dom(); - } else if arg == "vue" { - i += 1; - vue(); - } else if arg == "moment" { - i += 1; - moment(); - } else if arg == "dexie" { - i += 1; - dexie(); + a.react_dom = true; + } else if arg == "vue" || arg == "v" { + a.vue = true + } else if arg == "moment" || arg == "mt" { + a.moment = true; + } else if arg == "dexie" || arg == "dx" { + a.dexie = true; + } else if arg == "refs" { + a.refs = true; } } - // if no matching args were found, - // perform all the tests - if i == 0 { - jquery(); - angular1(); - react(); - react_dom(); - vue(); - moment(); - dexie(); + if a.jquery { + jquery(a.refs); + } + if a.angular { + angular1(a.refs); + } + if a.react { + react(a.refs); + } + if a.react_dom { + react_dom(a.refs); + } + if a.vue { + vue(a.refs); + } + if a.moment { + moment(a.refs); + } + if a.dexie { + dexie(a.refs); + } + if a.pristine() { + jquery(a.refs); + angular1(a.refs); + react(a.refs); + react_dom(a.refs); + vue(a.refs); + moment(a.refs); + dexie(a.refs); } } -fn jquery() { +fn jquery(refs: bool) { println!("trying jquery"); if let Ok(ref js) = get_js(Lib::Jquery) { - test_js(js, "jquery"); + test_js(js, "jquery", refs); } } -fn angular1() { +fn angular1(refs: bool) { println!("trying angular1"); if let Ok(ref js) = get_js(Lib::Angular) { - test_js(js, "angular"); + test_js(js, "angular", refs); } } -fn react() { +fn react(refs: bool) { println!("trying react"); if let Ok(ref js) = get_js(Lib::React) { - test_js(js, "react"); + test_js(js, "react", refs); } } -fn react_dom() { +fn react_dom(refs: bool) { println!("trying react_dom"); if let Ok(ref js) = get_js(Lib::ReactDom) { - test_js(js, "react-dom"); + test_js(js, "react-dom", refs); } } -fn vue() { +fn vue(refs: bool) { println!("trying vue"); if let Ok(ref js) = get_js(Lib::Vue) { - test_js(js, "vue"); + test_js(js, "vue", refs); } } -fn moment() { +fn moment(refs: bool) { println!("trying moment"); if let Ok(ref js) = get_js(Lib::Moment) { - test_js(js, "moment") + test_js(js, "moment", refs) } } -fn dexie() { +fn dexie(refs: bool) { println!("trying dexie"); if let Ok(ref js) = get_js(Lib::Dexie) { - test_js(js, "dexie") + test_js(js, "dexie", refs); } } -fn test_js(text: &str, name: &str) { +fn test_js(text: &str, name: &str, refs: bool) { let size = text.len(); - let now = SystemTime::now(); - let s = ress::Scanner::new(text); - let _: Vec = s.collect(); + if refs { + test_ref(text); + } else { + test(text); + } if let Ok(e) = now.elapsed() { report(size, e, "scanner", name) } else { @@ -116,6 +170,16 @@ fn test_js(text: &str, name: &str) { } } +fn test_ref(text: &str) { + let s = ress::refs::RefScanner::new(text); + let _: Vec = s.collect(); +} + +fn test(text: &str) { + let s = ress::Scanner::new(text); + let _: Vec = s.collect(); +} + fn report(bytes: usize, elapsed: Duration, method: &str, name: &str) { let size = get_size(bytes); println!( diff --git a/examples/semi_finder/src/main.rs b/examples/semi_finder/src/main.rs index 0bfaf228..ac275c6d 100644 --- a/examples/semi_finder/src/main.rs +++ b/examples/semi_finder/src/main.rs @@ -82,5 +82,6 @@ fn check_js(js: &str) -> Vec { } else { None } - }).collect() + }) + .collect() } diff --git a/package.json b/package.json index 9b4e56e9..4a94255c 100644 --- a/package.json +++ b/package.json @@ -1,15 +1,15 @@ { "devDependencies": { "angular": "^1.5.6", + "dexie": "^2.0.4", + "everything.js": "^1.0.3", "jquery": "^3.3.1", + "moment": "^2.22.2", "react": "^16.4.1", "react-dom": "^16.4.1", - "vue": "^2.5.16", - "moment": "^2.22.2", - "dexie": "^2.0.4" + "vue": "^2.5.16" }, "dependencies": { - "esprima": "^4.0.1", - "everything.js": "^1.0.3" + "esprima": "^4.0.1" } } diff --git a/src/comments.rs b/src/comments.rs index bad9b3d3..e48f8ba4 100644 --- a/src/comments.rs +++ b/src/comments.rs @@ -73,7 +73,7 @@ impl ToString for Comment { } } -pub(crate) fn comment() -> impl Parser +pub fn comment() -> impl Parser where I: Stream, I::Error: ParseError, @@ -187,8 +187,8 @@ mod test { s.lines() .map(|l| { l.trim() - .trim_left_matches(left_matches) - .trim_right_matches(right_matches) + .trim_start_matches(left_matches) + .trim_end_matches(right_matches) }) .collect::>() .join("\n") diff --git a/src/keywords.rs b/src/keywords.rs index 7056a58a..19ffb39b 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1,5 +1,5 @@ use combine::{ - choice, error::ParseError, not_followed_by, parser::char::string, attempt, Parser, Stream, + attempt, choice, error::ParseError, not_followed_by, parser::char::string, Parser, Stream, }; use tokens::{raw_ident_part, Token}; #[derive(Debug, PartialEq, Clone, Copy)] @@ -202,7 +202,8 @@ impl ::std::string::ToString for Keyword { Keyword::While => "while", Keyword::With => "with", Keyword::Yield => "yield", - }.into() + } + .into() } } @@ -213,7 +214,7 @@ impl Keyword { /// - export /// - implements /// - super - pub fn is_future_reserved(&self) -> bool { + pub fn is_future_reserved(self) -> bool { match self { Keyword::Enum => true, Keyword::Export => true, @@ -235,7 +236,7 @@ impl Keyword { /// - static /// - yield /// - let - pub fn is_strict_reserved(&self) -> bool { + pub fn is_strict_reserved(self) -> bool { match self { Keyword::Implements => true, Keyword::Interface => true, @@ -277,7 +278,7 @@ impl Keyword { /// - void /// - while /// - with - pub fn is_reserved(&self) -> bool { + pub fn is_reserved(self) -> bool { match self { Keyword::Break => true, Keyword::Case => true, @@ -310,7 +311,7 @@ impl Keyword { } } /// generate a parser that will return an instance of Token::Keyword on success -pub(crate) fn literal() -> impl Parser +pub fn literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -319,7 +320,8 @@ where attempt(future_reserved()), attempt(strict_mode_reserved()), attempt(reserved()), - )).skip(not_followed_by(raw_ident_part())) + )) + .skip(not_followed_by(raw_ident_part())) .map(|t| t) } /// generate a parser that will return a Token::Keyword with in finds @@ -355,37 +357,67 @@ where I: Stream, I::Error: ParseError, { - choice([ - attempt(string("await")), - attempt(string("break")), - attempt(string("case")), - attempt(string("catch")), - attempt(string("class")), - attempt(string("const")), - attempt(string("continue")), - attempt(string("debugger")), - attempt(string("default")), - attempt(string("delete")), - attempt(string("do")), - attempt(string("else")), - attempt(string("finally")), - attempt(string("for")), - attempt(string("function")), - attempt(string("if")), - attempt(string("instanceof")), - attempt(string("in")), - attempt(string("new")), - attempt(string("return")), - attempt(string("switch")), - attempt(string("this")), - attempt(string("throw")), - attempt(string("try")), - attempt(string("typeof")), - attempt(string("var")), - attempt(string("void")), - attempt(string("while")), - attempt(string("with")), - ]).map(|t| Token::Keyword(Keyword::from(t.to_owned()))) + choice(( + attempt(reserved_a_to_d()), + attempt(reserved_e_to_r()), + attempt(reserved_s_to_z()), + )) + .map(Token::Keyword) +} + +pub(crate) fn reserved_a_to_d() -> impl Parser +where + I: Stream, + I::Error: ParseError, +{ + choice(( + attempt(string("await").map(|_| Keyword::Await)), + attempt(string("break").map(|_| Keyword::Break)), + attempt(string("case").map(|_| Keyword::Case)), + attempt(string("catch").map(|_| Keyword::Catch)), + attempt(string("class").map(|_| Keyword::Class)), + attempt(string("const").map(|_| Keyword::Const)), + attempt(string("continue").map(|_| Keyword::Continue)), + attempt(string("debugger").map(|_| Keyword::Debugger)), + attempt(string("default").map(|_| Keyword::Default)), + attempt(string("delete").map(|_| Keyword::Delete)), + attempt(string("do").map(|_| Keyword::Do)), + )) +} + +pub(crate) fn reserved_e_to_r() -> impl Parser +where + I: Stream, + I::Error: ParseError, +{ + choice(( + attempt(string("else").map(|_| Keyword::Else)), + attempt(string("finally").map(|_| Keyword::Finally)), + attempt(string("for").map(|_| Keyword::For)), + attempt(string("function").map(|_| Keyword::Function)), + attempt(string("if").map(|_| Keyword::If)), + attempt(string("instanceof").map(|_| Keyword::InstanceOf)), + attempt(string("in").map(|_| Keyword::In)), + attempt(string("new").map(|_| Keyword::New)), + attempt(string("return").map(|_| Keyword::Return)), + )) +} +pub(crate) fn reserved_s_to_z() -> impl Parser +where + I: Stream, + I::Error: ParseError, +{ + choice(( + attempt(string("switch").map(|_| Keyword::Switch)), + attempt(string("this").map(|_| Keyword::This)), + attempt(string("throw").map(|_| Keyword::Throw)), + attempt(string("try").map(|_| Keyword::Try)), + attempt(string("typeof").map(|_| Keyword::TypeOf)), + attempt(string("var").map(|_| Keyword::Var)), + attempt(string("void").map(|_| Keyword::Void)), + attempt(string("while").map(|_| Keyword::While)), + attempt(string("with").map(|_| Keyword::With)), + )) } /// Generate a parser that will return an instance of Token::Keyword when one of the /// future reserved words are found @@ -401,11 +433,12 @@ where I::Error: ParseError, { choice(( - attempt(string("export")), - attempt(string("import")), - attempt(string("super")), - attempt(string("enum")), - )).map(|t| Token::Keyword(Keyword::from(t))) + attempt(string("export").map(|_| Keyword::Export)), + attempt(string("import").map(|_| Keyword::Import)), + attempt(string("super").map(|_| Keyword::Super)), + attempt(string("enum").map(|_| Keyword::Enum)), + )) + .map(Token::Keyword) } /// Generate a parser that will return an instance of Token::Keyword when a @@ -427,16 +460,17 @@ where I::Error: ParseError, { choice(( - attempt(string("implements")), - attempt(string("interface")), - attempt(string("package")), - attempt(string("private")), - attempt(string("protected")), - attempt(string("public")), - attempt(string("static")), - attempt(string("yield")), - attempt(string("let")), - )).map(|t| Token::Keyword(Keyword::from(t))) + attempt(string("implements").map(|_| Keyword::Implements)), + attempt(string("interface").map(|_| Keyword::Interface)), + attempt(string("package").map(|_| Keyword::Package)), + attempt(string("private").map(|_| Keyword::Private)), + attempt(string("protected").map(|_| Keyword::Protected)), + attempt(string("public").map(|_| Keyword::Public)), + attempt(string("static").map(|_| Keyword::Static)), + attempt(string("yield").map(|_| Keyword::Yield)), + attempt(string("let").map(|_| Keyword::Let)), + )) + .map(Token::Keyword) } #[cfg(test)] diff --git a/src/lib.rs b/src/lib.rs index 9c7b1353..8836f94b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,15 +27,17 @@ extern crate combine; extern crate log; extern crate unic_ucd_ident; +pub mod refs; + use combine::Parser; -mod comments; -mod keywords; -mod numeric; -mod punct; -mod regex; -mod strings; -mod tokens; -mod unicode; +pub mod comments; +pub mod keywords; +pub mod numeric; +pub mod punct; +pub mod regex; +pub mod strings; +pub mod tokens; +pub mod unicode; pub use comments::{Comment, Kind as CommentKind}; pub use keywords::Keyword; pub use numeric::Number; @@ -191,7 +193,7 @@ impl Scanner { if advance_cursor { self.spans.push(span.clone()); self.cursor = self.stream.len() - - pair.1.trim_left_matches(whitespace_or_line_term).len(); + - pair.1.trim_start_matches(whitespace_or_line_term).len(); let whitespace = &self.stream[prev_cursor..self.cursor]; self.pending_new_line = whitespace.chars().any(is_line_term); } @@ -225,7 +227,7 @@ impl Scanner { if advance_cursor { self.spans.push(span.clone()); self.cursor = self.stream.len() - - pair.1.trim_left_matches(whitespace_or_line_term).len(); + - pair.1.trim_start_matches(whitespace_or_line_term).len(); let whitespace = &self.stream[prev_cursor..self.cursor]; self.pending_new_line = whitespace.chars().any(|c| is_line_term(c)); } @@ -618,7 +620,7 @@ this.y = 0; // explicit reference to token assert!(i.token.is_keyword()); - // implitic deref to token + // implicit deref to token assert!(i.is_keyword()); } diff --git a/src/numeric.rs b/src/numeric.rs index c4abc7b5..f44703e8 100644 --- a/src/numeric.rs +++ b/src/numeric.rs @@ -1,9 +1,9 @@ use combine::{ - choice, + attempt, choice, error::ParseError, many, many1, optional, parser::char::{char as c_char, digit, hex_digit, oct_digit}, - attempt, Parser, Stream, + Parser, Stream, }; use tokens::Token; @@ -69,17 +69,12 @@ pub enum Kind { Octal, } -pub(crate) fn literal() -> impl Parser +pub fn literal() -> impl Parser where I: Stream, I::Error: ParseError, { - choice(( - attempt(bin_literal()), - attempt(octal_literal()), - attempt(hex_literal()), - attempt(decimal_literal()), - )).map(super::Token::Numeric) + choice((attempt(non_decimal()), attempt(decimal_literal()))).map(super::Token::Numeric) } fn decimal_literal() -> impl Parser @@ -87,7 +82,11 @@ where I: Stream, I::Error: ParseError, { - choice((attempt(full_decimal_literal()), attempt(no_leading_decimal()))).map(|t| t) + choice(( + attempt(full_decimal_literal()), + attempt(no_leading_decimal()), + )) + .map(|t| t) } fn full_decimal_literal() -> impl Parser @@ -105,16 +104,22 @@ where ) .map( |(integer, remainder, exponent): (String, Option<(char, String)>, Option)| { - let mut ret = String::new(); - ret.push_str(&integer); - if let Some((p, r)) = remainder { - ret.push(p); - ret.push_str(&r); - } - if let Some(ex) = exponent { - ret.push_str(&ex); - } - Number(ret) + let remainder = if let Some((_, remainder)) = remainder { + format!(".{}", remainder) + } else { + String::new() + }; + let exponent = if let Some(exp) = exponent { + exp + } else { + String::new() + }; + Number(format!( + "{integer}{remainder}{exponent}", + integer = integer, + remainder = remainder, + exponent = exponent, + )) }, ) } @@ -130,12 +135,12 @@ where many1(digit()), ) .map(|(e, sign, value): (char, Option, String)| { - let mut ret = e.to_string(); - if let Some(sign) = sign { - ret.push(sign) - } - ret.push_str(&value); - ret + let sign = if let Some(sign) = sign { + sign.to_string() + } else { + String::new() + }; + format!("{}{}{}", e, sign, value) }) } @@ -145,19 +150,31 @@ where I::Error: ParseError, { (c_char('.'), many1(digit()), optional(exponent())).map( - |(dot, remainder, exponent): (char, String, Option)| { - let mut ret = String::new(); - ret.push(dot); - ret.push_str(&remainder); - if let Some(ex) = exponent { - ret.push_str(&ex); - } - Number(ret) + |(_, remainder, exponent): (_, String, Option)| { + let ex = if let Some(ex) = exponent { + ex + } else { + String::new() + }; + Number(format!(".{}{}", remainder, ex)) }, ) } -fn hex_literal() -> impl Parser +pub fn non_decimal() -> impl Parser +where + I: Stream, + I::Error: ParseError, +{ + choice(( + attempt(hex_literal()), + attempt(octal_literal()), + attempt(bin_literal()), + )) + .map(|(kind, integer): (char, String)| Number(format!("0{}{}", kind, integer))) +} + +fn hex_literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -167,15 +184,10 @@ where choice([c_char('x'), c_char('X')]), many1(hex_digit()), ) - .map(|(zero, x, integer): (char, char, String)| { - let mut ret = format!("{}", zero); - ret.push(x); - ret.push_str(&integer); - Number(ret) - }) + .map(|(_, x, integer): (_, char, String)| (x, integer)) } -fn bin_literal() -> impl Parser +fn bin_literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -185,16 +197,10 @@ where choice([c_char('b'), c_char('B')]), many1(choice([c_char('1'), c_char('0')])), ) - .map(|(zero, b, integer): (char, char, String)| { - let mut ret = String::new(); - ret.push(zero); - ret.push(b); - ret.push_str(&integer); - Number(ret) - }) + .map(|(_, b, integer): (_, char, String)| (b, integer)) } -fn octal_literal() -> impl Parser +fn octal_literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -204,13 +210,7 @@ where choice([c_char('o'), c_char('O')]), many1(oct_digit()), ) - .map(|(zero, o, integer): (char, char, String)| { - let mut ret = String::new(); - ret.push(zero); - ret.push(o); - ret.push_str(&integer); - Number(ret) - }) + .map(|(_, o, integer): (_, char, String)| (o, integer)) } #[cfg(test)] diff --git a/src/punct.rs b/src/punct.rs index d67f1228..e3a098de 100644 --- a/src/punct.rs +++ b/src/punct.rs @@ -1,11 +1,12 @@ use combine::{ - choice, + attempt, choice, error::ParseError, not_followed_by, parser::char::{char as c_char, string}, - attempt, Parser, Stream, + Parser, Stream, }; use tokens::Token; + #[derive(Debug, PartialEq, Clone, Copy)] pub enum Punct { And, @@ -186,12 +187,13 @@ impl ::std::string::ToString for Punct { } } } -pub(crate) fn punctuation() -> impl Parser +pub fn punctuation() -> impl Parser where I: Stream, I::Error: ParseError, { - choice((attempt(multi_punct()), attempt(single_punct()))).map(|t: String| Token::Punct(Punct::from(t))) + choice((attempt(multi_punct()), attempt(single_punct()))) + .map(|t: String| Token::Punct(Punct::from(t))) } fn single_punct() -> impl Parser @@ -207,7 +209,11 @@ where I: Stream, I::Error: ParseError, { - choice((attempt(c_char('}')), attempt(normal_punct_not_close_brace()))).map(|c: char| c) + choice(( + attempt(c_char('}')), + attempt(normal_punct_not_close_brace()), + )) + .map(|c: char| c) } fn normal_punct_not_close_brace() -> impl Parser @@ -238,7 +244,8 @@ where attempt(c_char('&')), attempt(c_char('|')), attempt(c_char('^')), - ]).map(|c: char| c) + ]) + .map(|c: char| c) } fn div_punct() -> impl Parser @@ -286,7 +293,8 @@ where attempt(string(">=")), attempt(string("=>")), attempt(string("**")), - ]).map(|t| t.to_string()) + ]) + .map(|t| t.to_string()) } #[cfg(test)] diff --git a/src/refs/comments.rs b/src/refs/comments.rs new file mode 100644 index 00000000..6eb098c4 --- /dev/null +++ b/src/refs/comments.rs @@ -0,0 +1,138 @@ +use combine::{ + attempt, choice, eof, + error::ParseError, + optional, + parser::{char::string, repeat::take_until}, + range::recognize, + Parser, RangeStream, Stream, +}; +use refs::tokens::{Comment, RefToken as Token}; +use strings::line_terminator_sequence; + +pub fn comment<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(multi_comment::<'a, I>()), + attempt(single_comment::<'a, I>()), + attempt(html_comment::<'a, I>()), + )) + .map(Token::Comment) +} + +pub(crate) fn single_comment<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(single_comment_new_line::<'a, I>()), + attempt(single_comment_eof::<'a, I>()), + )) + .map(|_| Comment::SingleLine) +} + +fn single_comment_eof<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize((string("//"), take_until::(eof()))) +} + +fn single_comment_new_line<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + string("//"), + take_until::(line_terminator_sequence()), + )) +} + +pub(crate) fn multi_comment<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + multi_line_comment_start(), + take_until::(multi_line_comment_end()), + multi_line_comment_end(), + )) + .map(|_| Comment::MultiLine) +} + +fn multi_line_comment_start<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + combine::range::range("/*".into()).map(|_| ()) +} + +fn multi_line_comment_end<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + combine::range::range("*/".into()).map(|_| ()) +} + +fn html_comment<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + combine::range::range("".into())), + combine::range::range("-->".into()), + optional(take_until::(attempt(line_terminator_sequence()))), + )) + .map(|_| Comment::Html) +} + +#[cfg(test)] +mod test { + use super::*; + static COMMENTS: &[&str] = &[ + "//this is a comment", + "/*this is a +multi-line comment*/", + "", + " with a trailer", + ]; + #[test] + fn ref_comments() { + for c in COMMENTS.iter() { + let result = comment().parse(*c); + assert!(result.is_ok()); + } + } +} diff --git a/src/refs/keywords.rs b/src/refs/keywords.rs new file mode 100644 index 00000000..07428f29 --- /dev/null +++ b/src/refs/keywords.rs @@ -0,0 +1,243 @@ +use combine::{ + attempt, choice, error::ParseError, not_followed_by, range::range, Parser, RangeStream, Stream, +}; + +use keywords::Keyword; +use refs::tokens::RefToken as Token; +use tokens::raw_ident_part; + +/// generate a parser that will return an instance of Token::Keyword on success +pub fn literal<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((future_reserved(), strict_mode_reserved(), reserved())) + .skip(not_followed_by(raw_ident_part())) + .map(Token::Keyword) +} + +/// generate a parser that will return a Token::Keyword with in finds +/// one of the reserved keywords +/// ## Keywords +/// - break +/// - case +/// - catch +/// - continue +/// - debugger +/// - default +/// - delete +/// - do +/// - else +/// - for +/// - function +/// - if +/// - instanceof +/// - in +/// - new +/// - return +/// - switch +/// - this +/// - throw +/// - try +/// - typeof +/// - var +/// - void +/// - while +/// - with +pub(crate) fn reserved<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice((reserved_a_to_d(), reserved_e_to_r(), reserved_s_to_z())) +} + +pub(crate) fn reserved_a_to_d<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(range("await".into()).map(|_| Keyword::Await)), + attempt(range("break".into()).map(|_| Keyword::Break)), + attempt(range("case".into()).map(|_| Keyword::Case)), + attempt(range("catch".into()).map(|_| Keyword::Catch)), + attempt(range("class".into()).map(|_| Keyword::Class)), + attempt(range("const".into()).map(|_| Keyword::Const)), + attempt(range("continue".into()).map(|_| Keyword::Continue)), + attempt(range("debugger".into()).map(|_| Keyword::Debugger)), + attempt(range("default".into()).map(|_| Keyword::Default)), + attempt(range("delete".into()).map(|_| Keyword::Delete)), + attempt(range("do".into()).map(|_| Keyword::Do)), + )) +} + +pub(crate) fn reserved_e_to_r<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(range("else".into()).map(|_| Keyword::Else)), + attempt(range("finally".into()).map(|_| Keyword::Finally)), + attempt(range("for".into()).map(|_| Keyword::For)), + attempt(range("function".into()).map(|_| Keyword::Function)), + attempt(range("if".into()).map(|_| Keyword::If)), + attempt(range("instanceof".into()).map(|_| Keyword::InstanceOf)), + attempt(range("in".into()).map(|_| Keyword::In)), + attempt(range("new".into()).map(|_| Keyword::New)), + attempt(range("return".into()).map(|_| Keyword::Return)), + )) +} + +pub(crate) fn reserved_s_to_z<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(range("switch".into()).map(|_| Keyword::Switch)), + attempt(range("this".into()).map(|_| Keyword::This)), + attempt(range("throw".into()).map(|_| Keyword::Throw)), + attempt(range("try".into()).map(|_| Keyword::Try)), + attempt(range("typeof".into()).map(|_| Keyword::TypeOf)), + attempt(range("var".into()).map(|_| Keyword::Var)), + attempt(range("void".into()).map(|_| Keyword::Void)), + attempt(range("while".into()).map(|_| Keyword::While)), + attempt(range("with".into()).map(|_| Keyword::With)), + )) +} + +/// Generate a parser that will return an instance of Token::Keyword when a +/// strict mode reserved word is found +/// +/// ##Keywords +/// - implements +/// - interface +/// - package +/// - private +/// - protected +/// - public +/// - static +/// - yield +/// - let +pub(crate) fn strict_mode_reserved<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(range("implements".into()).map(|_| Keyword::Implements)), + attempt(range("interface".into()).map(|_| Keyword::Interface)), + attempt(range("package".into()).map(|_| Keyword::Package)), + attempt(range("private".into()).map(|_| Keyword::Private)), + attempt(range("protected".into()).map(|_| Keyword::Protected)), + attempt(range("public".into()).map(|_| Keyword::Public)), + attempt(range("static".into()).map(|_| Keyword::Static)), + attempt(range("yield".into()).map(|_| Keyword::Yield)), + attempt(range("let".into()).map(|_| Keyword::Let)), + )) +} + +/// Generate a parser that will return an instance of Token::Keyword when one of the +/// future reserved words are found +/// +/// ## Keywords +/// - export +/// - import +/// - super +/// - enum +pub(crate) fn future_reserved<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(range("export".into()).map(|_| Keyword::Export)), + attempt(range("import".into()).map(|_| Keyword::Import)), + attempt(range("super".into()).map(|_| Keyword::Super)), + attempt(range("enum".into()).map(|_| Keyword::Enum)), + )) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn ref_keyword() { + let keywords = [ + "implements", + "interface", + "package", + "private", + "protected", + "public", + "static", + "yield", + "let", + "enum", + "export", + "import", + "super", + "break", + "case", + "catch", + "continue", + "debugger", + "default", + "delete", + "do", + "else", + "finally", + "for", + "function", + "if", + "instanceof", + "in", + "new", + "return", + "switch", + "this", + "throw", + "try", + "typeof", + "var", + "void", + "while", + "with", + ]; + for key in keywords.iter() { + let s = key.to_string(); + let s = s.as_str(); + let result = match literal().easy_parse(s) { + Ok(pair) => pair, + Err(e) => panic!("failed parsing {}\n{}", key, e), + }; + if let Token::Keyword(k) = result.0 { + assert_eq!(k.to_string(), *key); + } + assert_eq!(result.1.len(), 0); + } + } +} diff --git a/src/refs/mod.rs b/src/refs/mod.rs new file mode 100644 index 00000000..91adebcb --- /dev/null +++ b/src/refs/mod.rs @@ -0,0 +1,583 @@ +use combine::Parser; +pub mod comments; +pub mod keywords; +pub mod numbers; +pub mod punct; +pub mod regex; +pub mod strings; +pub mod tokens; + +use super::{is_line_term, whitespace_or_line_term, ScannerState, OpenCurlyKind}; +use keywords::Keyword; +use punct::Punct; +pub use refs::tokens::RefToken; +use tokens::Span; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct RefItem { + pub token: RefToken, + pub span: Span, +} + +impl RefItem { + pub fn new(token: RefToken, span: Span) -> Self { + Self { token, span } + } + pub fn is_string(&self) -> bool { + self.token.is_string() + } + pub fn is_eof(&self) -> bool { + self.token.is_eof() + } + pub fn is_template(&self) -> bool { + self.token.is_template_head() + || self.token.is_template_body() + || self.token.is_template_tail() + + } +} + +#[allow(unused)] +pub struct RefScanner { + pub stream: String, + pub eof: bool, + pub cursor: usize, + pub spans: Vec, + last_open_paren_idx: usize, + replacement: usize, + pub pending_new_line: bool, + curly_stack: Vec, +} + +impl RefScanner { + pub fn new(text: impl Into) -> Self { + let text = text.into(); + let cursor = text.len() - text.trim_start_matches(super::whitespace).len(); + Self { + stream: text, + eof: false, + cursor, + spans: Vec::new(), + last_open_paren_idx: 0, + replacement: 0, + pending_new_line: false, + curly_stack: Vec::new(), + } + } +} + +impl Iterator for RefScanner { + type Item = RefItem; + fn next(&mut self) -> Option { + self.get_next_token(true) + } +} + +impl RefScanner { + /// Attempts to look ahead 1 token + /// + /// Similar to how `Peekable::peek` works however the + /// returned value will not be a borrowed `Item`. Since + /// there isn't a borrow happening this essentially duplicates + /// the cost of calling `next`. + /// + /// ``` + /// # extern crate ress; + /// # use ress::{Scanner,Token}; + /// # fn main() { + /// let js = "function thing() { return; }"; + /// let mut s = Scanner::new(js); + /// assert_eq!(s.look_ahead().unwrap().token, Token::keyword("function")); + /// assert_eq!(s.next().unwrap().token, Token::keyword("function")); + /// # } + /// ``` + pub fn look_ahead(&mut self) -> Option { + self.get_next_token(false) + } + /// Skip any upcoming comments to get the + /// next valid js token + pub fn skip_comments(&mut self) { + debug!(target: "ress", "skipping comments"); + let mut new_cursor = self.cursor; + while let Some(ref item) = self.next() { + if let RefToken::Comment(_) = item.token { + new_cursor = self.cursor; + } else { + break; + } + } + debug!(target: "ress", "skipped {} bytes worth of comments", new_cursor.saturating_sub(self.cursor)); + self.cursor = new_cursor; + } + /// Get a copy of the scanner's current state + pub fn get_state(&self) -> ScannerState { + ScannerState { + cursor: self.cursor, + spans_len: self.spans.len(), + last_paren: self.last_open_paren_idx, + replacement: self.replacement, + curly_stack: self.curly_stack.clone(), + } + } + /// Set the scanner's current state to the state provided + pub fn set_state(&mut self, state: ScannerState) { + self.cursor = state.cursor; + self.spans.truncate(state.spans_len); + self.last_open_paren_idx = state.last_paren; + self.replacement = state.replacement; + self.curly_stack = state.curly_stack; + } + + fn get_next_token<'a>(&mut self, advance_cursor: bool) -> Option { + if self.eof { + debug!(target: "ress", "end of iterator, returning None"); + return None; + }; + let prev_cursor = self.cursor; + let result = self::tokens::token().parse(&self.stream[self.cursor..]); + match result { + Ok(pair) => { + if (pair.0.matches_punct(&Punct::ForwardSlash) + || pair.0.matches_punct(&Punct::DivideAssign)) + && self.is_regex_start() { + match regex::regex_tail().parse(&self.stream[self.cursor + 1..]) { + Ok(regex_pair) => { + let full_len = self.stream.len(); + let span_end = full_len - regex_pair.1.len(); + let span = Span::new(self.cursor, span_end); + if advance_cursor { + self.spans.push(span.clone()); + self.cursor = self.stream.len() + - regex_pair + .1 + .trim_start_matches(whitespace_or_line_term) + .len(); + let whitespace = &self.stream[prev_cursor..self.cursor]; + self.pending_new_line = whitespace.chars().any(is_line_term); + } + debug!(target: "ress", "{}: {:?}", if advance_cursor { "next regex item" } else {"look ahead"}, regex_pair.0); + Some(RefItem::new(regex_pair.0, span)) + } + Err(e) => panic!( + "Failed to parse token last successful parse ended {}\nError: {}", + self.cursor, e, + ), + } + } else if pair.0.matches_punct(&Punct::CloseBrace) && self.looking_for_template_end() { + match strings::template_continuation().parse(pair.1) { + Ok(pair) => { + if pair.0.is_template_tail() && advance_cursor { + let _ = self.curly_stack.pop(); + } + let full_len = self.stream.len(); + let span_end = full_len - pair.1.len(); + let span = Span::new(self.cursor, span_end); + if advance_cursor { + self.spans.push(span.clone()); + self.cursor = self.stream.len() + - pair.1.trim_start_matches(whitespace_or_line_term).len(); + let whitespace = &self.stream[prev_cursor..self.cursor]; + self.pending_new_line = whitespace.chars().any(is_line_term); + } + debug!("{}: {:?}", if advance_cursor { "next template item" } else {"look ahead"}, pair.0); + Some(RefItem::new(pair.0, span)) + } + Err(e) => panic!( + "Failed to parse token last successful parse ended {}\nError: {}", + self.cursor, e, + ), + } + } else { + if pair.0.matches_punct(&Punct::OpenBrace) { + self.curly_stack.push(OpenCurlyKind::Block); + } + if pair.0.matches_punct(&Punct::CloseBrace) { + let _ = self.curly_stack.pop(); + } + if pair.0.matches_punct(&Punct::OpenParen) && advance_cursor { + self.last_open_paren_idx = self.spans.len(); + } + if pair.0.is_eof() && advance_cursor { + self.eof = true; + } + if pair.0.is_template_head() && advance_cursor && !pair.0.is_template_tail() { + self.curly_stack.push(OpenCurlyKind::Template); + } + let full_len = self.stream.len(); + let span_end = full_len - pair.1.len(); + let span = Span::new(self.cursor, span_end); + if advance_cursor { + self.spans.push(span.clone()); + self.cursor = self.stream.len() + - pair + .1 + .trim_start_matches(super::whitespace_or_line_term) + .len(); + let whitespace = &self.stream[prev_cursor..self.cursor]; + self.pending_new_line = whitespace.chars().any(super::is_line_term); + } + info!(target: "ress", "{}: {:?}", if advance_cursor { "next item" } else {"look ahead"}, pair.0); + Some(RefItem::new(pair.0, span)) + } + } + Err(e) => panic!( + "Failed to parse token last successful parse ended {}\nError: {}", + self.cursor, e, + ), + } + } + + fn looking_for_template_end(&self) -> bool { + if let Some(last) = self.curly_stack.last() { + last == &OpenCurlyKind::Template + } else { + false + } + } + + fn is_regex_start(&self) -> bool { + if let Some(last_token) = self.last_token() { + if (!last_token.is_keyword() && !last_token.is_punct()) + || last_token.matches_keyword(&Keyword::This) + || last_token.matches_punct(&Punct::CloseBracket) + { + false + } else if last_token.matches_punct(&Punct::CloseParen) { + self.check_for_conditional() + } else if last_token.matches_punct(&Punct::CloseBrace) { + self.check_for_func() + } else { + true + } + } else { + true + } + } + + fn last_token(&self) -> Option { + if self.spans.is_empty() { + return None; + } + let mut current_idx = self.spans.len().saturating_sub(1); + while current_idx > 0 { + if let Some(t) = self.token_for(&self.spans[current_idx]) { + if t.is_comment() { + current_idx = current_idx.saturating_sub(1); + } else { + return Some(t); + } + } + } + None + } + + fn check_for_conditional(&self) -> bool { + if let Some(before) = self.nth_before_last_open_paren(1) { + before.matches_keyword(&Keyword::If) + || before.matches_keyword(&Keyword::For) + || before.matches_keyword(&Keyword::While) + || before.matches_keyword(&Keyword::With) + } else { + true + } + } + + fn check_for_func(&self) -> bool { + if let Some(before) = self.nth_before_last_open_paren(1) { + if before.is_ident() { + if let Some(three_before) = self.nth_before_last_open_paren(3) { + return Self::check_for_expression(&three_before); + } + } else if before.matches_keyword(&Keyword::Function) { + if let Some(two_before) = self.nth_before_last_open_paren(2) { + return Self::check_for_expression(&two_before); + } else { + return false; + } + } + } + true + } + + fn check_for_expression(token: &RefToken) -> bool { + token.matches_punct(&Punct::OpenParen) + && !token.matches_punct(&Punct::OpenBrace) + && !token.matches_punct(&Punct::OpenBracket) + && !token.matches_punct(&Punct::Assign) + && !token.matches_punct(&Punct::AddAssign) + && !token.matches_punct(&Punct::SubtractAssign) + && !token.matches_punct(&Punct::MultiplyAssign) + && !token.matches_punct(&Punct::ExponentAssign) + && !token.matches_punct(&Punct::DivideAssign) + && !token.matches_punct(&Punct::ModuloAssign) + && !token.matches_punct(&Punct::LeftShiftAssign) + && !token.matches_punct(&Punct::RightShiftAssign) + && !token.matches_punct(&Punct::UnsignedRightShiftAssign) + && !token.matches_punct(&Punct::BitwiseAndAssign) + && !token.matches_punct(&Punct::BitwiseOrAssign) + && !token.matches_punct(&Punct::BitwiseXOrAssign) + && !token.matches_punct(&Punct::Comma) + && !token.matches_punct(&Punct::Plus) + && !token.matches_punct(&Punct::Minus) + && !token.matches_punct(&Punct::Asterisk) + && !token.matches_punct(&Punct::Exponent) + && !token.matches_punct(&Punct::ForwardSlash) + && !token.matches_punct(&Punct::Modulo) + && !token.matches_punct(&Punct::Increment) + && !token.matches_punct(&Punct::Decrement) + && !token.matches_punct(&Punct::LeftShift) + && !token.matches_punct(&Punct::RightShift) + && !token.matches_punct(&Punct::UnsignedRightShift) + && !token.matches_punct(&Punct::And) + && !token.matches_punct(&Punct::Pipe) + && !token.matches_punct(&Punct::Caret) + && !token.matches_punct(&Punct::Not) + && !token.matches_punct(&Punct::BitwiseNot) + && !token.matches_punct(&Punct::LogicalAnd) + && !token.matches_punct(&Punct::LogicalOr) + && !token.matches_punct(&Punct::QuestionMark) + && !token.matches_punct(&Punct::Colon) + && !token.matches_punct(&Punct::StrictEquals) + && !token.matches_punct(&Punct::Equal) + && !token.matches_punct(&Punct::GreaterThanEqual) + && !token.matches_punct(&Punct::LessThanEqual) + && !token.matches_punct(&Punct::LessThan) + && !token.matches_punct(&Punct::GreaterThan) + && !token.matches_punct(&Punct::NotEqual) + && !token.matches_punct(&Punct::StrictNotEquals) + && !token.matches_keyword(&Keyword::In) + && !token.matches_keyword(&Keyword::TypeOf) + && !token.matches_keyword(&Keyword::InstanceOf) + && !token.matches_keyword(&Keyword::New) + && !token.matches_keyword(&Keyword::Return) + && !token.matches_keyword(&Keyword::Case) + && !token.matches_keyword(&Keyword::Delete) + && !token.matches_keyword(&Keyword::Throw) + && !token.matches_keyword(&Keyword::Void) + } + + fn nth_before_last_open_paren(&self, n: usize) -> Option { + if self.spans.len() < n { + return None; + } + self.token_for(&self.spans[self.last_open_paren_idx - n]) + } + + fn token_for(&self, span: &Span) -> Option { + if let Ok(t) = self::tokens::token().parse(&self.stream[span.start..span.end]) { + Some(t.0) + } else { + None + } + } + + pub fn string_for(&self, span: &Span) -> Option { + if self.stream.len() < span.start || self.stream.len() < span.end { + None + } else { + Some(self.stream[span.start..span.end].to_string()) + } + } +} + +#[cfg(test)] +mod test { + use super::*; + // #[test] + // fn tokenizer() { + // let js = " + // 'use strict'; + // function thing() { + // let x = 0; + // console.log('stuff'); + // }"; + // let expectation = vec![ + // RefToken::String(StringLit::Single), + // RefToken::Punct(Punct::SemiColon), + // RefToken::Keyword(Keyword::Function), + // RefToken::Ident, + // RefToken::Punct(Punct::OpenParen), + // RefToken::Punct(Punct::CloseParen), + // RefToken::Punct(Punct::OpenBrace), + // RefToken::Keyword(Keyword::Let), + // RefToken::Ident, + // RefToken::Punct(Punct::Assign), + // RefToken::Numeric(super::tokens::Number::Dec), + // RefToken::punct(Punct::SemiColon), + // RefToken::Ident, + // RefToken::Punct(Punct::Period), + // RefToken::Ident, + // RefToken::Punct(Punct::OpenParen), + // RefToken::String(StringLit::Single), + // RefToken::punct(Punct::CloseParen), + // RefToken::punct(Punct::SemiColon), + // RefToken::punct(Punct::CloseBrace), + // RefToken::EoF, + // ]; + // for tok in tokenize(js).into_iter().zip(expectation.into_iter()) { + // assert_eq!(tok.0, tok.1); + // } + // } + + #[test] + fn ref_scanner() { + let s = super::RefScanner::new( + "(function() { +this.x = 100; +this.y = 0; +})();", + ); + let expected = vec![ + RefToken::Punct(Punct::OpenParen), //"(" + RefToken::Keyword(Keyword::Function), + RefToken::Punct(Punct::OpenParen), //"(" + RefToken::Punct(Punct::CloseParen), //")" + RefToken::Punct(Punct::OpenBrace), //"{" + RefToken::Keyword(Keyword::This), + RefToken::Punct(Punct::Period), //"." + RefToken::Ident, + RefToken::Punct(Punct::Assign), //"=" + RefToken::Numeric(tokens::Number::Dec), + RefToken::Punct(Punct::SemiColon), //";" + RefToken::Keyword(Keyword::This), + RefToken::Punct(Punct::Period), //"." + RefToken::Ident, + RefToken::Punct(Punct::Assign), //"=" + RefToken::Numeric(tokens::Number::Dec), + RefToken::Punct(Punct::SemiColon), //";" + RefToken::Punct(Punct::CloseBrace), //"}" + RefToken::Punct(Punct::CloseParen), //")" + RefToken::Punct(Punct::OpenParen), //"(" + RefToken::Punct(Punct::CloseParen), //")" + RefToken::Punct(Punct::SemiColon), //";" + RefToken::EoF, + ]; + validate(s, expected); + } + + // #[test] + // fn template_one_sub() { + // let one_sub = "`things and stuff times ${x}`"; + // let s = Scanner::new(one_sub); + // let expected = vec![ + // Token::template_head("things and stuff times "), + // Token::ident("x"), + // Token::template_tail(""), + // ]; + // validate(s, expected); + // } + + // #[test] + // fn template_two_subs() { + // let two_subs = "`things and stuff times ${x} divided by ${y}`"; + // let s = Scanner::new(two_subs); + // let expected = vec![ + // Token::template_head("things and stuff times "), + // Token::ident("x"), + // Token::template_middle(" divided by "), + // Token::ident("y"), + // Token::template_tail(""), + // ]; + // validate(s, expected); + // } + // #[test] + // fn multiline_template() { + // let plain = "`things and + // stuff`"; + // let p_r = tokens::token().parse(plain).unwrap(); + // assert_eq!( + // p_r, + // (Token::no_sub_template(&plain[1..plain.len() - 1]), "") + // ); + // let subbed = "`things and + // stuff times ${x}`"; + // let s = Scanner::new(subbed); + // let expected = vec![ + // Token::template_head("things and\n stuff times "), + // Token::ident("x"), + // Token::template_tail(""), + // ]; + // validate(s, expected); + // } + // #[test] + // fn nested_template() { + // let test = "`outer ${`inner ${0}`}`"; + // let expected = vec![ + // Token::template_head("outer "), + // Token::template_head("inner "), + // Token::numeric("0"), + // Token::template_tail(""), + // Token::template_tail(""), + // ]; + // let s = Scanner::new(test); + // validate(s, expected); + // } + // #[test] + // fn look_ahead() { + // let js = "function() { return; }"; + // let mut s = Scanner::new(js); + // loop { + // let peek = s.look_ahead(); + // let next = s.next(); + // assert_eq!(peek, next); + // if peek.is_none() { + // break; + // } + // } + // } + + fn validate(s: RefScanner, expected: Vec) { + for (i, (lhs, rhs)) in s.zip(expected.into_iter()).enumerate() { + assert_eq!((i, lhs.token), (i, rhs)); + } + } + + // #[test] + // fn get_str() { + // let js = "function ( ) { return ; }"; + // let mut s = Scanner::new(js); + // let strs = js.split(' '); + // for (i, p) in strs.enumerate() { + // let item = s.next().unwrap(); + // let q = s.string_for(&item.span).unwrap(); + // assert_eq!((i, p.to_string()), (i, q)) + // } + // } + + // #[test] + // fn item_deref_to_token() { + // let js = "function ( ) { return ; }"; + // let mut s = Scanner::new(js); + // let i: Item = s.next().unwrap(); + + // // explicit reference to token + // assert!(i.token.is_keyword()); + // // implicit deref to token + // assert!(i.is_keyword()); + // } + + // #[test] + // fn spans() { + // let js = include_str!("../node_modules/esprima/dist/esprima.js"); + // let mut s = Scanner::new(js); + // while let Some(ref item) = s.next() { + // let from_stream = &s.stream[item.span.start..item.span.end]; + // let token = item.token.to_string(); + + // if from_stream != token { + // panic!("token mismatch {:?} \n{}\n{}\n", item, from_stream, token); + // } + // } + // } + + // #[test] + // fn local_host_regex() { + // let js = r#"/^(http|https):\/\/(localhost|127\.0\.0\.1)/"#; + // let mut s = Scanner::new(js); + // let r = s.next().unwrap(); + // assert_eq!( + // r.token, + // Token::regex(r#"^(http|https):\/\/(localhost|127\.0\.0\.1)"#, None) + // ); + // } +} diff --git a/src/refs/numbers.rs b/src/refs/numbers.rs new file mode 100644 index 00000000..71cd4efa --- /dev/null +++ b/src/refs/numbers.rs @@ -0,0 +1,166 @@ +use combine::{ + attempt, choice, + error::ParseError, + many, many1, optional, + parser::char::{char as c_char, digit, hex_digit, oct_digit}, + range::recognize, + Parser, RangeStream, Stream, +}; + +use refs::tokens::{Number, RefToken as Token}; + +pub fn literal<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((attempt(non_decimal()), attempt(decimal_literal::<'a, I>()))).map(Token::Numeric) +} + +fn decimal_literal<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(full_decimal_literal()), + attempt(no_leading_decimal()), + )) + .map(|_| Number::Dec) +} + +fn full_decimal_literal<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + //any number of digits + many1::(digit()), + //optionally followed by a . and any number of digits + optional((c_char('.'), many::(digit()))), + //optionally followed by e|E and any number of digits + optional(exponent()), + )) + .map(|_| ()) +} + +fn exponent<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + choice([c_char('e'), c_char('E')]), + optional(choice([c_char('-'), c_char('+')])), + many1::(digit()), + )) + .map(|_| ()) +} + +fn no_leading_decimal<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + c_char('.'), + many1::(digit()), + optional(exponent()), + )) + .map(|_| ()) +} + +pub fn non_decimal<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(hex_literal::<'a, I>()), + attempt(octal_literal::<'a, I>()), + attempt(bin_literal::<'a, I>()), + )) +} + +fn hex_literal<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + c_char('0'), + choice([c_char('x'), c_char('X')]), + many1::(hex_digit()), + )) + .map(|_| Number::Hex) +} + +fn bin_literal<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + c_char('0'), + choice([c_char('b'), c_char('B')]), + many1::(choice([c_char('1'), c_char('0')])), + )) + .map(|_| Number::Bin) +} + +fn octal_literal<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + c_char('0'), + choice([c_char('o'), c_char('O')]), + many1::(oct_digit()), + )) + .map(|_| Number::Oct) +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn ref_numbers() { + let numbers = [ + "1", + "123.345", + "123.345e11", + ".99E23", + "0x77afd", + "0o7777", + "0b010101001", + ]; + + for num in numbers.iter() { + let result = literal().easy_parse(*num).unwrap(); + assert!(result.1.len() == 0); + } + } +} diff --git a/src/refs/punct.rs b/src/refs/punct.rs new file mode 100644 index 00000000..513454ee --- /dev/null +++ b/src/refs/punct.rs @@ -0,0 +1,630 @@ +use combine::{ + attempt, choice, + error::ParseError, + not_followed_by, + parser::char::{char as c_char, string}, + range::recognize, + Parser, Stream, +}; +use punct::Punct; +use refs::tokens::RefToken as Token; + +pub fn punctuation() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((attempt(multi_punct()), attempt(single_punct()))).map(Token::Punct) +} + +fn single_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((attempt(normal_punct()), attempt(div_punct()))) +} + +fn normal_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(recognize(c_char('}'))).map(|_| Punct::CloseBrace), + attempt(normal_punct_not_close_brace()), + )) +} + +fn normal_punct_not_close_brace() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(open_brace()), + attempt(open_paren()), + attempt(close_paren()), + attempt(period()), + attempt(semi()), + attempt(comma()), + attempt(open_bracket()), + attempt(close_bracket()), + attempt(colon()), + attempt(question()), + attempt(tilde()), + attempt(gt()), + attempt(lt()), + attempt(assign()), + attempt(bang()), + attempt(plus()), + attempt(minus()), + attempt(mul()), + attempt(modulo()), + attempt(bit_and()), + attempt(pipe()), + attempt(xor()), + )) +} + +fn div_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('/').skip(not_followed_by(c_char('*')))).map(|_| Punct::ForwardSlash) +} + +fn multi_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(four_char_punct()), + attempt(three_char_punct()), + attempt(two_char_punct()) + )) +} + +fn four_char_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + unsigned_rhs_assign().map(|_| Punct::UnsignedRightShiftAssign) +} + +fn three_char_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + spread(), + attempt(strict_equals()), + attempt(strict_not_equals()), + attempt(unsigned_rhs()), + attempt(lhs_assign()), + attempt(rhs_assign()), + attempt(exp_assign()), + )) +} + +fn two_char_punct() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(logical_and()), + attempt(logical_or()), + attempt(equal()), + attempt(not_equal()), + attempt(add_assign()), + attempt(sub_assign()), + attempt(mul_assign()), + attempt(div_assign()), + attempt(increment()), + attempt(decrement()), + attempt(lhs()), + attempt(rhs()), + attempt(and_assign()), + attempt(or_assign()), + attempt(xor_assign()), + attempt(mod_assign()), + attempt(leq()), + attempt(geq()), + attempt(fat_arrow()), + attempt(exp()), + )) +} + +fn open_brace() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('{')).map(|_| Punct::OpenBrace) +} +fn open_paren() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('(')).map(|_| Punct::OpenParen) +} +fn close_paren() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char(')')).map(|_| Punct::CloseParen) +} +fn period() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('.')).map(|_| Punct::Period) +} +fn semi() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char(';')).map(|_| Punct::SemiColon) +} +fn comma() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char(',')).map(|_| Punct::Comma) +} +fn open_bracket() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('[')).map(|_| Punct::OpenBracket) +} +fn close_bracket() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char(']')).map(|_| Punct::CloseBracket) +} +fn colon() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char(':')).map(|_| Punct::Colon) +} +fn question() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('?')).map(|_| Punct::QuestionMark) +} +fn tilde() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('~')).map(|_| Punct::BitwiseNot) +} +fn gt() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('>')).map(|_| Punct::GreaterThan) +} +fn lt() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('<')).map(|_| Punct::LessThan) +} +fn assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('=')).map(|_| Punct::Assign) +} +fn bang() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('!')).map(|_| Punct::Not) +} +fn plus() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('+')).map(|_| Punct::Plus) +} +fn minus() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('-')).map(|_| Punct::Minus) +} +fn mul() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('*')).map(|_| Punct::Asterisk) +} +fn modulo() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('%')).map(|_| Punct::Modulo) +} +fn bit_and() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('&')).map(|_| Punct::And) +} +fn pipe() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('|')).map(|_| Punct::Pipe) +} +fn xor() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(c_char('^')).map(|_| Punct::Caret) +} + +fn unsigned_rhs_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(">>>=")).map(|_| Punct::UnsignedRightShiftAssign) +} +fn spread() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("...")).map(|_| Punct::Spread) +} +fn strict_equals() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("===")).map(|_| Punct::StrictEquals) +} +fn strict_not_equals() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("!==")).map(|_| Punct::StrictNotEquals) +} +fn unsigned_rhs() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(">>>")).map(|_| Punct::UnsignedRightShift) +} +fn lhs_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("<<=")).map(|_| Punct::LeftShiftAssign) +} +fn rhs_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(">>=")).map(|_| Punct::RightShiftAssign) +} +fn exp_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("**=")).map(|_| Punct::ExponentAssign) +} +fn logical_and() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("&&")).map(|_| Punct::LogicalAnd) +} +fn logical_or() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("||")).map(|_| Punct::LogicalOr) +} +fn equal() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("==")).map(|_| Punct::Equal) +} +fn not_equal() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("!=")).map(|_| Punct::NotEqual) +} +fn add_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("+=")).map(|_| Punct::AddAssign) +} +fn sub_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("-=")).map(|_| Punct::SubtractAssign) +} +fn mul_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("*=")).map(|_| Punct::MultiplyAssign) +} +fn div_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("/=")).map(|_| Punct::DivideAssign) +} +fn increment() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("++")).map(|_| Punct::Increment) +} +fn decrement() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("--")).map(|_| Punct::Decrement) +} +fn lhs() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("<<")).map(|_| Punct::LeftShift) +} +fn rhs() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(">>")).map(|_| Punct::RightShift) +} +fn and_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("&=")).map(|_| Punct::BitwiseAndAssign) +} +fn or_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("|=")).map(|_| Punct::BitwiseOrAssign) +} +fn xor_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("^=")).map(|_| Punct::BitwiseXOrAssign) +} +fn mod_assign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("%=")).map(|_| Punct::ModuloAssign) +} +fn leq() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("<=")).map(|_| Punct::LessThanEqual) +} +fn geq() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(">=")).map(|_| Punct::GreaterThanEqual) +} +fn fat_arrow() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("=>")).map(|_| Punct::FatArrow) +} +fn exp() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("**")).map(|_| Punct::Exponent) +} + + +#[cfg(test)] +mod test { + use crate::{ + Punct, + refs::{ + RefToken + }, + }; + use combine::Parser; + #[test] + fn gt_eq() { + let js = ">="; + let expectation = RefToken::Punct(Punct::GreaterThanEqual); + let parsed = super::punctuation().parse(js).unwrap().0; + assert_eq!(expectation, parsed) + } +} \ No newline at end of file diff --git a/src/refs/regex.rs b/src/refs/regex.rs new file mode 100644 index 00000000..64347c7b --- /dev/null +++ b/src/refs/regex.rs @@ -0,0 +1,163 @@ +use super::{ + super::{is_line_term, is_source_char, tokens::ident_part}, + RefToken as Token, +}; +use combine::{ + attempt, between, choice, error::ParseError, many, parser::char::char as c_char, + range::recognize, satisfy, Parser, Stream, +}; +/// Parse a regex literal starting after the first / +pub fn regex_tail<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(( + attempt(regex_body::<'a, I>()), + c_char('/'), + attempt(regex_flags::<'a, I>()), + )) + .map(|_| Token::RegEx) +} + +fn regex_flags<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(many::, _>(ident_part())).map(|_| ()) +} +/// Parse the body portion of the regex literal +fn regex_body<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize((regex_first_char(), many::, _>(regex_char()))).map(|_| ()) +} + +fn regex_first_char<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(regex_body_first_source_char()), + attempt(regular_expression_backslash_sequence()), + attempt(regular_expression_class()), + )) + .map(|_| ()) +} + +fn regex_body_first_source_char<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(satisfy(|c: char| { + is_source_char(c) && !is_line_term(c) && c != '*' && c != '\\' && c != '/' && c != '[' + })) + .map(|_| ()) +} + +fn regex_body_source_char<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(satisfy(|c: char| { + is_source_char(c) && !is_line_term(c) && c != '\\' && c != '/' && c != '[' + })) + .map(|_| ()) +} + +fn regex_char<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt(regex_body_source_char()), + attempt(regular_expression_backslash_sequence()), + attempt(regular_expression_class()), + )) + .map(|_| ()) +} + +fn regular_expression_class<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(between( + c_char('['), + c_char(']'), + many::, _>(regular_expression_class_char()), + )) + .map(|_| ()) +} + +fn regular_expression_class_char<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + choice(( + attempt( + recognize(satisfy(|c: char| { + is_source_char(c) && !is_line_term(c) && c != '\u{005C}' && c != '\u{005D}' + })) + .map(|_| ()), + ), + attempt(regular_expression_backslash_sequence()), + )) + .map(|_| ()) +} +pub(crate) fn source_char_not_line_term<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(satisfy(|c: char| is_source_char(c) && !is_line_term(c))).map(|_| ()) +} + +fn regular_expression_backslash_sequence<'a, I>() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + ::Range: std::convert::From<&'a str>, +{ + recognize(c_char('\\').and(source_char_not_line_term())).map(|_| ()) +} diff --git a/src/refs/strings.rs b/src/refs/strings.rs new file mode 100644 index 00000000..bdecfc31 --- /dev/null +++ b/src/refs/strings.rs @@ -0,0 +1,296 @@ +use combine::{ + attempt, between, choice, + error::ParseError, + many, not_followed_by, + parser::{ + char::{char as c_char, spaces, string}, + item::satisfy, + range::recognize, + }, + Parser, RangeStream, Stream, +}; + +use super::super::{is_line_term, is_source_char}; +use refs::tokens::{RefToken as Token, StringLit, Template}; + +pub fn literal<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((attempt(single_quote()), attempt(double_quote()))).map(Token::String) +} + +fn single_quote() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(between( + c_char('\''), + c_char('\''), + many::(single_quoted_content()), + )) + .map(|_| StringLit::Single) +} + +fn single_quoted_content() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(escaped_single_quote()), + attempt(escaped_escape()), + attempt(string_continuation()), + attempt(recognize(satisfy(|c: char| c != '\'' && !is_line_term(c)))), + )) +} + +fn escaped_single_quote() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(r#"\'"#)) +} + +fn escaped_escape() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(r#"\\"#)) +} + +fn string_continuation() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize((c_char('\\'), line_terminator_sequence())).skip(spaces()) +} + +fn double_quote() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(between( + c_char('"'), + c_char('"'), + many::(double_quoted_content()), + )) + .map(|_| StringLit::Double) +} + +fn double_quoted_content() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + attempt(escaped_double_quote()), + attempt(escaped_escape()), + attempt(string_continuation()), + attempt(recognize(satisfy(|c: char| c != '"' && !is_line_term(c)))), + )) +} + +fn escaped_double_quote() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string(r#"\""#)) +} + +fn line_terminator() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(satisfy(is_line_term)) +} + +pub(crate) fn line_terminator_sequence() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(choice(( + attempt(recognize(string("\r\n"))), + attempt(line_terminator()), + ))) +} + +pub fn template_start() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + choice((attempt(no_sub_template()), attempt(template_head()))).map(Token::Template) +} + +pub fn template_continuation() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + choice((attempt(template_middle()), attempt(template_tail()))).map(Token::Template) +} + +fn no_sub_template() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(between( + c_char('`'), + c_char('`'), + many::(template_char()), + )) + .map(|_| Template::NoSub) +} + +fn template_head() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(between( + string("`"), + string("${"), + many::(template_char()), + )) + .map(|_| Template::Head) +} + +fn template_middle() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize((many::(template_char()), string("${"))).map(|_| Template::Body) +} + +fn template_tail() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + (many::(template_char()), recognize(c_char('`'))).map(|_| Template::Tail) +} + +fn template_char() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + choice(( + attempt(single_dollar_sign()), + attempt(escaped_template_start()), + attempt(escaped_back_tick()), + attempt(solo_back_slash()), + attempt(template_char_catch_all()), + )) +} + +fn single_dollar_sign() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(c_char('$').skip(not_followed_by(c_char('{')))) +} + +fn escaped_template_start() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(string(r#"\${"#)) +} + +fn escaped_back_tick() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(string(r#"\`"#)) +} + +fn solo_back_slash() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(string(r#"\"#)) +} + +fn template_char_catch_all() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize(satisfy(|c: char| is_source_char(c) && c != '`' && c != '$')) +} diff --git a/src/refs/tokens.rs b/src/refs/tokens.rs new file mode 100644 index 00000000..ff91675d --- /dev/null +++ b/src/refs/tokens.rs @@ -0,0 +1,523 @@ +use combine::{ + attempt, choice, eof, + error::ParseError, + many, not_followed_by, + parser::char::{char as c_char, string}, + range::recognize, + Parser, RangeStream, Stream, +}; + +use keywords::Keyword; +use punct::Punct; +use refs::{ + comments::comment, + keywords::literal as keyword, + numbers::literal as number, + punct::punctuation, + strings::{literal as string_lit, template_start}, +}; +use unicode; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum StringLit { + Double, + Single, +} +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Comment { + SingleLine, + MultiLine, + Html, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Number { + Dec, + Bin, + Hex, + Oct, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Template { + NoSub, + Head, + Body, + Tail, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RefToken { + /// `true` of `false` + Boolean(bool), + /// The end of the file + EoF, + /// An identifier this will be either a variable name + /// or a function/method name + Ident, + /// A word that has been reserved to not be used as an identifier + Keyword(Keyword), + /// A `null` literal value + Null, + /// A number, this includes integers (`1`), decimals (`0.1`), + /// hex (`0x8f`), binary (`0b010011010`), and octal (`0o273`) + Numeric(Number), + /// A punctuation mark, this includes all mathematical operators + /// logical operators and general syntax punctuation + Punct(Punct), + /// A string literal, either double or single quoted, the associated + /// value will be the unquoted string + String(StringLit), + /// A regular expression literal. + /// ```js + /// let regex = /[a-zA-Z]+/g; + /// ``` + RegEx, + /// The string parts of a template string + /// ``` + /// # extern crate ress; + /// # use ress::{Scanner, Item, Token, Number, Template}; + /// # fn main() { + /// let js = "`Things and stuff times ${10} equals ${100000000}... i think`"; + /// let mut s = Scanner::new(js); + /// assert_eq!(s.next().unwrap().token, + /// Token::template_head("Things and stuff times ")); + /// assert_eq!(s.next().unwrap().token, + /// Token::numeric("10")); + /// assert_eq!(s.next().unwrap().token, + /// Token::template_middle(" equals ")); + /// assert_eq!(s.next().unwrap().token, + /// Token::numeric("100000000")); + /// assert_eq!(s.next().unwrap().token, + /// Token::template_tail("... i think")); + /// # } + /// ``` + Template(Template), + /// A comment, the associated value will contain the raw comment + /// This will capture both inline comments `// I am an inline comment` + /// and multi-line comments + /// ```js + /// /*multi lines + /// * comments + /// */ + /// ``` + Comment(Comment), +} + +impl RefToken { + pub fn is_bool(&self) -> bool { + match self { + RefToken::Boolean(_) => true, + _ => false, + } + } + pub fn is_null(&self) -> bool { + self == &RefToken::Null + } + pub fn is_comment(&self) -> bool { + match self { + RefToken::Comment(_) => true, + _ => false, + } + } + + pub fn is_punct(&self) -> bool { + match self { + RefToken::Punct(_) => true, + _ => false, + } + } + + pub fn matches_punct(&self, p: &Punct) -> bool { + match self { + RefToken::Punct(o) => p == o, + _ => false, + } + } + + pub fn is_keyword(&self) -> bool { + match self { + RefToken::Keyword(_) => true, + _ => false, + } + } + + pub fn is_ident(&self) -> bool { + match self { + RefToken::Ident => true, + _ => false, + } + } + + pub fn matches_keyword(&self, k: &Keyword) -> bool { + match self { + RefToken::Keyword(l) => k == l, + _ => false, + } + } + + pub fn is_template_tail(&self) -> bool { + match self { + RefToken::Template(t) => t == &Template::Tail || t == &Template::NoSub, + _ => false, + } + } + pub fn is_template_head(&self) -> bool { + match self { + RefToken::Template(t) => t == &Template::Head || t == &Template::NoSub, + _ => false, + } + } + pub fn is_template_body(&self) -> bool { + match self { + RefToken::Template(t) => t == &Template::Body, + _ => false, + } + } + pub fn is_literal(&self) -> bool { + match self { + RefToken::String(_) + | RefToken::Numeric(_) + | RefToken::Null + | RefToken::RegEx + | RefToken::Boolean(_) + | RefToken::Template(_) => true, + _ => false, + } + } + + pub fn is_string(&self) -> bool { + match self { + RefToken::String(_) => true, + _ => false, + } + } + + pub fn is_regex(&self) -> bool { + match self { + RefToken::RegEx => true, + _ => false + } + } + + pub fn is_number(&self) -> bool { + match self { + RefToken::Numeric(_) => true, + _ => false, + } + } + + pub fn is_eof(&self) -> bool { + self == &RefToken::EoF + } + + pub fn is_strict_reserved(&self) -> bool { + match self { + RefToken::Keyword(ref k) => k.is_strict_reserved(), + _ => false, + } + } + pub fn is_restricted(&self) -> bool { + match self { + RefToken::Keyword(ref k) => k.is_reserved(), + _ => false, + } + } + pub fn is_future_reserved(&self) -> bool { + match self { + RefToken::Keyword(ref k) => k.is_future_reserved(), + _ => false, + } + } +} + +pub fn token<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((token_not_eof(), end_of_input())) +} + +pub(crate) fn token_not_eof<'a, I>() -> impl Parser +where + I: RangeStream, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice(( + comment(), + boolean_literal(), + attempt(keyword()), + attempt(ident()), + attempt(null_literal()), + attempt(number()), + attempt(string_lit()), + attempt(punctuation()), + attempt(template_start()), + )) +} + +pub fn boolean_literal() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + choice((attempt(true_literal()), attempt(false_literal()))).map(RefToken::Boolean) +} + +fn true_literal() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("true").skip(not_followed_by(super::super::tokens::raw_ident_part()))) + .map(|_| true) +} + +pub(crate) fn raw_ident_part() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(choice(( + super::super::unicode::id_continue(), + c_char('$'), + c_char('\\').skip(c_char('u')), + c_char('\u{200C}'), + c_char('\u{200D}'), + ))) +} + +fn false_literal() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("false").skip(not_followed_by(super::super::tokens::raw_ident_part()))) + .map(|_| false) +} + +pub fn null_literal() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(string("null").skip(not_followed_by(super::super::tokens::raw_ident_part()))) + .map(|_| RefToken::Null) +} + +pub fn ident() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, + std::string::String: std::iter::Extend<::Range>, +{ + recognize((ident_start(), many::(ident_part()))).map(|_| RefToken::Ident) +} + +pub(crate) fn ident_part() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(choice((attempt(ident_start()), attempt(raw_ident_part())))) +} + +fn ident_start() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(choice(( + attempt(unicode::id_start().map(|c: char| c.to_string())), + attempt(c_char('$').map(|c: char| c.to_string())), + attempt(c_char('_').map(|c: char| c.to_string())), + attempt(unicode::char_literal()), + ))) +} + +pub(crate) fn end_of_input() -> impl Parser +where + I: Stream, + I: combine::RangeStreamOnce, + ::Range: combine::stream::Range, + I::Error: ParseError, +{ + recognize(eof()).map(|_| RefToken::EoF) +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn ref_tokens() { + static TOKENS: &[&str] = &[ + "//this is a comment", + "/*this is a\nmulti-line comment*/", + "", + " with a trailer", + "implements", + "interface", + "package", + "private", + "protected", + "public", + "static", + "yield", + "let", + "enum", + "export", + "import", + "super", + "break", + "case", + "catch", + "continue", + "debugger", + "default", + "delete", + "do", + "else", + "finally", + "for", + "function", + "if", + "instanceof", + "in", + "new", + "return", + "switch", + "this", + "throw", + "try", + "typeof", + "var", + "void", + "while", + "with", + "0", + "00", + "1234567890", + "01234567", + "0.", + "0.00", + "10.00", + ".0", + ".0", + "0e0", + "0E0", + "0.e0", + "0.00e+0", + ".00e-0", + "0x0", + "0X0", + "0x0123456789abcdefABCDEF", + "0b0", + "0b0100101", + "0o0", + "0o777", + "2e308", + "{", + "}", + "(", + ")", + ".", + ";", + ",", + "[", + "]", + ":", + "?", + "~", + ">", + "<", + "=", + "!", + "+", + "-", + "/", + "*", + "%", + "&", + "|", + "^", + ">>>=", + "...", + "===", + "!==", + ">>>", + "<<=", + ">>=", + "**=", + "&&", + "||", + "==", + "!=", + "+=", + "-=", + "*=", + "/=", + "++", + "--", + "<<", + ">>", + "&=", + "|=", + "^=", + "%=", + "<=", + ">=", + "=>", + "**", + "$", + "_", + "\\u0078", + "x$", + "x_", + "x\\u0030", + "xa", + "x0", + "x0a", + "x0123456789", + "qwertyuiopasdfghjklzxcvbnm", + "QWERTYUIOPASDFGHJKLZXCVBNM", + "œ一", + "ǻ둘", + "ɤ〩", + "φ", + "fiⅷ", + "ユニコード", + "x\u{200c}\u{200d}", + "true", + "false", + "null", + "`things and stuff times ${", + "`things and stuff`", + "`a\\${b`", + "`\\0\\n\\x0A\\u000A\\u{A}${", + ]; + for t in TOKENS { + let s = t.to_string(); + let s = s.as_str(); + let _r = token().easy_parse(s).unwrap(); + } + } +} diff --git a/src/regex.rs b/src/regex.rs index cb3d8321..eeccfc19 100644 --- a/src/regex.rs +++ b/src/regex.rs @@ -1,7 +1,7 @@ use super::{is_line_term, is_source_char}; use combine::{ - between, choice, error::ParseError, many, parser::char::char as c_char, satisfy, attempt, Parser, - Stream, + attempt, between, choice, error::ParseError, many, parser::char::char as c_char, satisfy, + Parser, Stream, }; use tokens::{ident_part, Token}; @@ -40,7 +40,7 @@ impl ToString for RegEx { } } /// Parse a regex literal starting after the first / -pub(crate) fn regex_tail() -> impl Parser +pub fn regex_tail() -> impl Parser where I: Stream, I::Error: ParseError, @@ -70,7 +70,8 @@ where attempt(regex_body_first_source_char()), attempt(regular_expression_backslash_sequence()), attempt(regular_expression_class()), - )).map(|c: String| c) + )) + .map(|c: String| c) } fn regex_body_first_source_char() -> impl Parser @@ -80,7 +81,8 @@ where { satisfy(|c: char| { is_source_char(c) && !is_line_term(c) && c != '*' && c != '\\' && c != '/' && c != '[' - }).map(|c: char| c.to_string()) + }) + .map(|c: char| c.to_string()) } fn regex_body_source_char() -> impl Parser @@ -101,7 +103,8 @@ where attempt(regex_body_source_char()), attempt(regular_expression_backslash_sequence()), attempt(regular_expression_class()), - )).map(|s: String| s) + )) + .map(|s: String| s) } fn regular_expression_class() -> impl Parser @@ -113,7 +116,8 @@ where c_char('['), c_char(']'), many(regular_expression_class_char()), - ).map(|s: String| format!("[{}]", s)) + ) + .map(|s: String| format!("[{}]", s)) } fn regular_expression_class_char() -> impl Parser @@ -122,11 +126,15 @@ where I::Error: ParseError, { choice(( - attempt(satisfy(|c: char| { - is_source_char(c) && !is_line_term(c) && c != '\u{005C}' && c != '\u{005D}' - }).map(|c: char| c.to_string())), + attempt( + satisfy(|c: char| { + is_source_char(c) && !is_line_term(c) && c != '\u{005C}' && c != '\u{005D}' + }) + .map(|c: char| c.to_string()), + ), attempt(regular_expression_backslash_sequence()), - )).map(|s: String| s) + )) + .map(|s: String| s) } pub(crate) fn source_char_not_line_term() -> impl Parser where diff --git a/src/strings.rs b/src/strings.rs index c4692319..54abdfc1 100644 --- a/src/strings.rs +++ b/src/strings.rs @@ -1,16 +1,17 @@ use combine::{ - between, choice, + attempt, between, choice, error::ParseError, many, not_followed_by, parser::{ char::{char as c_char, spaces, string}, item::satisfy, }, - attempt, Parser, Stream, + Parser, Stream, }; use super::{is_line_term, is_source_char}; use tokens::Token; + #[derive(Debug, PartialEq, Clone)] pub enum StringLit { Single(String), @@ -111,7 +112,7 @@ impl ToString for Template { } } -pub(crate) fn literal() -> impl Parser +pub fn literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -137,7 +138,8 @@ where attempt(string(r#"\\"#).map(|s: &str| s.to_string())), attempt(string_continuation()), attempt(satisfy(|c: char| c != '\'' && !is_line_term(c)).map(|c: char| c.to_string())), - )).map(|s: String| s) + )) + .map(|s: String| s) } fn string_continuation() -> impl Parser @@ -168,7 +170,8 @@ where attempt(string(r#"\\"#).map(|s: &str| s.to_string())), attempt(string_continuation()), attempt(satisfy(|c: char| c != '"' && !is_line_term(c)).map(|c: char| c.to_string())), - )).map(|c: String| c) + )) + .map(|c: String| c) } fn line_terminator() -> impl Parser @@ -187,10 +190,11 @@ where choice(( attempt(string("\r\n").map(|s: &str| s.to_string())), attempt(line_terminator().map(|c: char| c.to_string())), - )).map(|s: String| s) + )) + .map(|s: String| s) } -pub(crate) fn template_start() -> impl Parser +pub fn template_start() -> impl Parser where I: Stream, I::Error: ParseError, @@ -198,7 +202,7 @@ where choice((attempt(no_sub_template()), attempt(template_head()))).map(Token::Template) } -pub(crate) fn template_continuation() -> impl Parser +pub fn template_continuation() -> impl Parser where I: Stream, I::Error: ParseError, @@ -244,15 +248,20 @@ where I::Error: ParseError, { choice(( - attempt(c_char('$') - .skip(not_followed_by(c_char('{'))) - .map(|c: char| c.to_string())), + attempt( + c_char('$') + .skip(not_followed_by(c_char('{'))) + .map(|c: char| c.to_string()), + ), attempt(string(r#"\${"#).map(|s: &str| s.to_string())), attempt(string(r#"\`"#).map(|s: &str| s.to_string())), attempt(string(r#"\"#).map(|s: &str| s.to_string())), - attempt(satisfy(|c: char| is_source_char(c) && c != '`' && c != '$') - .map(|c: char| c.to_string())), - )).map(|s: String| s) + attempt( + satisfy(|c: char| is_source_char(c) && c != '`' && c != '$') + .map(|c: char| c.to_string()), + ), + )) + .map(|s: String| s) } #[cfg(test)] @@ -296,6 +305,13 @@ mod test { assert_eq!(e_r, (Token::no_sub_template("things and stuff"), "")) } + #[test] + fn template_no_sub_escaped() { + let empty = r#"`a\${b`"#; + let e_r = token().easy_parse(empty).unwrap(); + assert_eq!(e_r, (Token::no_sub_template(r#"a\${b"#), "")); + } + #[test] fn template_head() { let h = "`things and stuff times ${"; @@ -303,6 +319,13 @@ mod test { assert_eq!(r, (Token::template_head("things and stuff times "), "")); } + #[test] + fn template_head_unicode_escapes() { + let h = r#"`\0\n\x0A\u000A\u{A}${"#; + let r = token().easy_parse(h).unwrap(); + assert_eq!(r, (Token::template_head(r#"\0\n\x0A\u000A\u{A}"#), "")); + } + #[test] fn template_middle() { let m = " and places and people ${"; diff --git a/src/tokens.rs b/src/tokens.rs index de6777b8..2814092f 100644 --- a/src/tokens.rs +++ b/src/tokens.rs @@ -1,11 +1,11 @@ use std::ops::Deref; use combine::{ - choice, eof, + attempt, choice, eof, error::ParseError, many, not_followed_by, parser::char::{char as c_char, string}, - attempt, Parser, Stream, + Parser, Stream, }; use comments; @@ -34,12 +34,12 @@ impl Item { impl Deref for Item { type Target = Token; - fn deref<'a>(&'a self) -> &'a Self::Target { + fn deref(&self) -> &Self::Target { &self.token } } -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone, Copy)] /// A location in the original source text pub struct Span { pub start: usize, @@ -119,7 +119,7 @@ pub enum BooleanLiteral { } impl BooleanLiteral { /// Test if this instance represents `true` - pub fn is_true(&self) -> bool { + pub fn is_true(self) -> bool { match self { BooleanLiteral::True => true, _ => false, @@ -475,7 +475,7 @@ impl Token { pub fn is_template_no_sub(&self) -> bool { match self { Token::Template(ref s) => s.is_no_sub(), - _ => false + _ => false, } } pub fn is_template_head(&self) -> bool { @@ -623,7 +623,7 @@ impl ToString for Token { } } } -parser!{ +parser! { pub fn token[I]()(I) -> Token where [I: Stream] { @@ -646,10 +646,10 @@ where attempt(strings::literal()), attempt(punct::punctuation()), attempt(strings::template_start()), - )).map(|t| t) + )) } -pub(crate) fn boolean_literal() -> impl Parser +pub fn boolean_literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -700,7 +700,7 @@ where eof().map(|_| Token::EoF) } -pub(crate) fn ident() -> impl Parser +pub fn ident() -> impl Parser where I: Stream, I::Error: ParseError, @@ -709,7 +709,7 @@ where .map(|(start, body): (String, String)| Token::Ident(Ident(start + &body))) } -pub(crate) fn null_literal() -> impl Parser +pub fn null_literal() -> impl Parser where I: Stream, I::Error: ParseError, @@ -729,7 +729,8 @@ where attempt(c_char('$').map(|c: char| c.to_string())), attempt(c_char('_').map(|c: char| c.to_string())), attempt(unicode::char_literal()), - )).map(|s: String| s) + )) + .map(|s: String| s) } pub(crate) fn ident_part() -> impl Parser diff --git a/src/unicode.rs b/src/unicode.rs index 7ae6df0b..28e264e8 100644 --- a/src/unicode.rs +++ b/src/unicode.rs @@ -24,7 +24,7 @@ where c_char('{'), many(hex_digit()).then_partial(|s: &mut String| { if let Ok(num) = u32::from_str_radix(&s, 16) { - if num <= 1114111 { + if num <= 1_114_111 { value(s.to_owned()).left() } else { unexpected_any("unicode code point must be <= 1114111").right() diff --git a/tests/ecma262/main.rs b/tests/ecma262/main.rs index 51bc05c9..efb04bdf 100644 --- a/tests/ecma262/main.rs +++ b/tests/ecma262/main.rs @@ -15,6 +15,13 @@ fn es5() { let js = get_js(EsVersion::Es5); run_test(&js); } +#[test] +fn ref_es5() { + println!("testing es5"); + ensure_logging(); + let js = get_js(EsVersion::Es5); + run_ref_test(&js); +} #[test] fn es2015_script() { @@ -23,14 +30,28 @@ fn es2015_script() { let js = get_js(EsVersion::Es2015Script); run_test(&js); } +#[test] +fn ref_es2015_script() { + ensure_logging(); + debug!("testing es2015 script"); + let js = get_js(EsVersion::Es2015Script); + run_ref_test(&js); +} #[test] fn es2015_module() { - println!("testing es2015 module"); ensure_logging(); + debug!("testing es2015 module"); let js = get_js(EsVersion::Es2015Module); run_test(&js); } +#[test] +fn ref_es2015_module() { + ensure_logging(); + debug!("testing es2015 module"); + let js = get_js(EsVersion::Es2015Module); + run_ref_test(&js); +} fn run_test(js: &str) { let mut s = Scanner::new(js); @@ -48,6 +69,15 @@ fn run_test(js: &str) { } } +fn run_ref_test(js: &str) { + let mut s = ress::refs::RefScanner::new(js); + let mut i = 0; + while let Some(item) = s.next() { + debug!("{}, {:?} {:?}", i, item.token, s.string_for(&item.span)); + i += 1; + } +} + fn ensure_logging() { let _ = pretty_env_logger::try_init(); } diff --git a/tests/moz_central/main.rs b/tests/moz_central/main.rs new file mode 100644 index 00000000..38d440c9 --- /dev/null +++ b/tests/moz_central/main.rs @@ -0,0 +1,69 @@ +#![cfg(all(test, feature = "moz_central"))] +extern crate flate2; +extern crate ress; +extern crate tar; + +use flate2::read::GzDecoder; +use std::path::{Path, PathBuf}; +use std::fs::read_to_string; +use ress::*; + +#[test] +fn moz_central() { + let moz_central_path = Path::new("./moz-central"); + if !moz_central_path.exists() { + get_moz_central_test_files(&moz_central_path); + } + walk(&moz_central_path); +} + +fn get_moz_central_test_files(path: &Path) { + let mut response = reqwest::get("https://hg.mozilla.org/mozilla-central/archive/tip.tar.gz/js/src/jit-test/tests/") + .expect("Failed to get zip of moz-central"); + let mut buf = Vec::new(); + response.copy_to(&mut buf) + .expect("failed to copy to BzDecoder"); + let gz = GzDecoder::new(buf.as_slice()); + let mut t = tar::Archive::new(gz); + t.unpack(path).expect("Failed to unpack gz"); +} + +fn walk(path: &Path) { + let files: Vec = path.read_dir().unwrap() + .map(|e| e.unwrap().path()).collect(); + files.iter().for_each(|path| { + if path.is_file() { + if let Some(ext) = path.extension() { + if ext == "js" { + if path.ends_with("gc/bug-1459860.js") + || path.ends_with("basic/testBug756918.js") + || path.ends_with("basic/bug738841.js") + || path.ends_with("ion/bug1331405.js") + || path.ends_with("basic/testThatGenExpsActuallyDecompile.js") + || path.ends_with("jaeger/bug672122.js") + || path.ends_with("gc/bug-924690.js") + || path.ends_with("auto-regress/bug732719.js") + || path.ends_with("auto-regress/bug740509.js") + || path.ends_with("auto-regress/bug521279.js") + || path.ends_with("auto-regress/bug701248.js") + || path.ends_with("auto-regress/bug1390082-1.js") + || path.ends_with("auto-regress/bug680797.js") + || path.ends_with("auto-regress/bug521163.js") + || path.ends_with("auto-regress/bug1448582-5.js") + || path.ends_with("tests/backup-point-bug1315634.js") + || path.ends_with("auto-regress/bug650574.js") + || path.ends_with("baseline/setcall.js") { + return; + } + let js = read_to_string(&path).unwrap(); + for _ in refs::RefScanner::new(js.as_str()) { + + } + + } + } + } else { + walk(&path) + } + }); +} \ No newline at end of file