From 90915a2214652646031ac287fb2a6a900f0bd461 Mon Sep 17 00:00:00 2001 From: Alex Ozdemir Date: Thu, 31 Oct 2019 11:56:24 -0700 Subject: [PATCH] A `std::io::Read`-based parser. Also, ran `cargo format`. (#3) * Run `cargo fmt` * Change parse internals to bytes rather than chars This change is straightforward because the DIMACS format is ASCII-only. * Added an std::io::Read-based parser Panics on IO errors Why? Because the alternatives are: * `collect` the first IO error -- requiring us to pull the whole string into memory * catch those IO errors deep in the lexer, and bubble them up as a special kind of `ParseError`. Since we'll generally be reading form files, read errors won't typically happen, so I figure its better to just panic, since a use wouldn't want to recover any ways. * Add tests for read_dimacs --- src/errors.rs | 106 ++++--- src/items.rs | 267 ++++++++-------- src/lexer.rs | 841 +++++++++++++++++++++++++++----------------------- src/lib.rs | 32 +- src/parser.rs | 662 +++++++++++++++++++++++---------------- 5 files changed, 1048 insertions(+), 860 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index e12cbdc..fe2e39f 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -6,84 +6,88 @@ /// to debug their input files formats. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Loc { - line: u64, - col : u64 + line: u64, + col: u64, } impl Loc { - /// Creates a new location with a given line and column. - pub fn new(line: u64, col: u64) -> Loc { - Loc{ line: line, col: col } - } - - /// Bumps the line of this location, resetting its column. - pub fn bump_line(&mut self) { - self.line += 1; - self.col = 0; - } - - /// Bumps the column of this location. - pub fn bump_col(&mut self) { - self.col += 1; - } + /// Creates a new location with a given line and column. + pub fn new(line: u64, col: u64) -> Loc { + Loc { + line: line, + col: col, + } + } + + /// Bumps the line of this location, resetting its column. + pub fn bump_line(&mut self) { + self.line += 1; + self.col = 0; + } + + /// Bumps the column of this location. + pub fn bump_col(&mut self) { + self.col += 1; + } } /// Different kinds of errors that may occure while parsing. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum ErrorKind { - /// When parsing an invalid character at the start of a token. - InvalidTokenStart, + /// When parsing an invalid character at the start of a token. + InvalidTokenStart, - /// When parsing an unknown keyword (e.g. "foo"). - UnknownKeyword, + /// When parsing an unknown keyword (e.g. "foo"). + UnknownKeyword, - /// When lexing an unexpected character. - UnexpectedChar, + /// When lexing an unexpected character. + UnexpectedChar, - /// When parsing an unexpected token. - UnexpectedToken, + /// When parsing an unexpected token. + UnexpectedToken, - /// When detecting an unexpected end of file. - UnexpectedEndOfFile, + /// When detecting an unexpected end of file. + UnexpectedEndOfFile, - /// When tried to parse an empty string. - EmptyTokenStream, + /// When tried to parse an empty string. + EmptyTokenStream, - /// When parsing an unknown SAT extension. - InvalidSatExtension, + /// When parsing an unknown SAT extension. + InvalidSatExtension, - /// When the parser is not at the end of file when finished parsing. - NotParsedToEnd, + /// When the parser is not at the end of file when finished parsing. + NotParsedToEnd, - /// When a natural number was expected but not found. - ExpectedNat, + /// When a natural number was expected but not found. + ExpectedNat, - /// When a literal was expected but not found. - ExpectedLit + /// When a literal was expected but not found. + ExpectedLit, // IllegalXorExtensionUsed, // enhanced check + // IllegalEqExtensionUsed, // enhanced check - // IllegalXorExtensionUsed, // enhanced check - // IllegalEqExtensionUsed, // enhanced check - - // TooManyVariables, // enhanced check - // TooManyClauses, // enhanced check - // SelfContradictingClause, // enhanced check + // TooManyVariables, // enhanced check + // TooManyClauses, // enhanced check + // SelfContradictingClause, // enhanced check } /// Represents an error that occured while parsing. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct ParseError { - /// The source location (line + column) of the error. - pub loc : Loc, + /// The source location (line + column) of the error. + pub loc: Loc, - /// The kind of the error that occured. - pub kind: ErrorKind, + /// The kind of the error that occured. + pub kind: ErrorKind, } impl ParseError { - /// Creates a new parser error at the given source location with the given error kind. - pub fn new(loc: Loc, kind: ErrorKind) -> Self { - ParseError { loc: loc, kind: kind } - } + /// Creates a new parser error at the given source location with the given error kind. + pub fn new(loc: Loc, kind: ErrorKind) -> Self { + ParseError { + loc: loc, + kind: kind, + } + } } /// The result type used within this crate while parsing. diff --git a/src/items.rs b/src/items.rs index c25854b..84f39cc 100644 --- a/src/items.rs +++ b/src/items.rs @@ -6,18 +6,20 @@ pub struct Var(pub u64); impl Var { - /// Converts a variable into its representative `u64` value. - pub fn to_u64(self) -> u64 { self.0 } + /// Converts a variable into its representative `u64` value. + pub fn to_u64(self) -> u64 { + self.0 + } } /// Represents the sign of a literal. #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum Sign { - /// Positive sign. - Pos, + /// Positive sign. + Pos, - /// Negative sign. - Neg + /// Negative sign. + Neg, } /// Represents a literal within clauses of formulas of a SAT instance. @@ -25,51 +27,57 @@ pub enum Sign { pub struct Lit(i64); impl Lit { - /// Returns the underlying `i64` representant of this literal. - pub fn from_i64(val: i64) -> Lit { Lit(val) } - - /// Returns the associated variable for this literal. - pub fn var(self) -> Var { Var(self.0.abs() as u64) } - - /// Returns the inner `i64` value. - pub fn to_i64(self) -> i64 { self.0 } - - /// Returns the sign of this literal. - pub fn sign(self) -> Sign { - match self.0 >= 0 { - true => Sign::Pos, - _ => Sign::Neg - } - } + /// Returns the underlying `i64` representant of this literal. + pub fn from_i64(val: i64) -> Lit { + Lit(val) + } + + /// Returns the associated variable for this literal. + pub fn var(self) -> Var { + Var(self.0.abs() as u64) + } + + /// Returns the inner `i64` value. + pub fn to_i64(self) -> i64 { + self.0 + } + + /// Returns the sign of this literal. + pub fn sign(self) -> Sign { + match self.0 >= 0 { + true => Sign::Pos, + _ => Sign::Neg, + } + } } /// Represents a clause instance within a `.cnf` file. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Clause { - lits: Box<[Lit]> + lits: Box<[Lit]>, } impl Clause { - /// Creates a new clause from a vector of literals. - pub fn from_vec(lits: Vec) -> Clause { - Clause{ - lits: lits.into_boxed_slice() - } - } - - /// Returns the number of literals of this clause. - pub fn len(&self) -> usize { - self.lits.len() - } - - /// Returns a slice over the literals of this clause. - pub fn lits(&self) -> &[Lit] { - &self.lits - } + /// Creates a new clause from a vector of literals. + pub fn from_vec(lits: Vec) -> Clause { + Clause { + lits: lits.into_boxed_slice(), + } + } + + /// Returns the number of literals of this clause. + pub fn len(&self) -> usize { + self.lits.len() + } + + /// Returns a slice over the literals of this clause. + pub fn lits(&self) -> &[Lit] { + &self.lits + } } /// An indirection to a `Formula` via `Box`. -pub type FormulaBox = Box; +pub type FormulaBox = Box; /// An immutable list of `Formula`s. pub type FormulaList = Box<[Formula]>; @@ -77,113 +85,120 @@ pub type FormulaList = Box<[Formula]>; /// Represents the structure of formulas of `.sat` files. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Formula { - /// A single literal. This is the leaf node type of sat formulas. - Lit(Lit), + /// A single literal. This is the leaf node type of sat formulas. + Lit(Lit), - /// Represents `(f)` if `f` is a valid formula. - Paren(FormulaBox), + /// Represents `(f)` if `f` is a valid formula. + Paren(FormulaBox), - /// Represents `-(f)` if `f` is a valid formula. - /// This negates the result of the inner `f`. - Neg(FormulaBox), + /// Represents `-(f)` if `f` is a valid formula. + /// This negates the result of the inner `f`. + Neg(FormulaBox), - /// Represents `*(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. - /// The effect is a logical and of its inner formulas. - And(FormulaList), + /// Represents `*(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. + /// The effect is a logical and of its inner formulas. + And(FormulaList), - /// Represents `+(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. - /// The effect is a logical or of its inner formulas. - Or(FormulaList), + /// Represents `+(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. + /// The effect is a logical or of its inner formulas. + Or(FormulaList), - /// Represents `xor(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. - /// The effect is a logical xor of its inner formulas. - Xor(FormulaList), + /// Represents `xor(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. + /// The effect is a logical xor of its inner formulas. + Xor(FormulaList), - /// Represents `=(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. - /// The effect is a logical equals of its inner formulas. - Eq(FormulaList) + /// Represents `=(f_1 .. f_k)` if `f_1, .., f_k` are valid formulas. + /// The effect is a logical equals of its inner formulas. + Eq(FormulaList), } impl Formula { - /// Creates a new literal leaf formula with the given literal. - pub fn lit(lit: Lit) -> Formula { - Formula::Lit(lit) - } - - /// Wraps the inner formula within parentheses. - pub fn paren(inner: Formula) -> Formula { - Formula::Paren(Box::new(inner)) - } - - /// Negates the inner formula. - pub fn neg(inner: Formula) -> Formula { - Formula::Neg(Box::new(inner)) - } - - /// Creates a logical and formula of all given formulas in `param`. - pub fn and(params: Vec) -> Formula { - Formula::And(params.into_boxed_slice()) - } - - /// Creates a logical or formula of all given formulas in `param`. - pub fn or(params: Vec) -> Formula { - Formula::Or(params.into_boxed_slice()) - } - - /// Creates a logical xor formula of all given formulas in `param`. - pub fn xor(params: Vec) -> Formula { - Formula::Xor(params.into_boxed_slice()) - } - - /// Creates a logical equality formula of all given formulas in `param`. - pub fn eq(params: Vec) -> Formula { - Formula::Eq(params.into_boxed_slice()) - } + /// Creates a new literal leaf formula with the given literal. + pub fn lit(lit: Lit) -> Formula { + Formula::Lit(lit) + } + + /// Wraps the inner formula within parentheses. + pub fn paren(inner: Formula) -> Formula { + Formula::Paren(Box::new(inner)) + } + + /// Negates the inner formula. + pub fn neg(inner: Formula) -> Formula { + Formula::Neg(Box::new(inner)) + } + + /// Creates a logical and formula of all given formulas in `param`. + pub fn and(params: Vec) -> Formula { + Formula::And(params.into_boxed_slice()) + } + + /// Creates a logical or formula of all given formulas in `param`. + pub fn or(params: Vec) -> Formula { + Formula::Or(params.into_boxed_slice()) + } + + /// Creates a logical xor formula of all given formulas in `param`. + pub fn xor(params: Vec) -> Formula { + Formula::Xor(params.into_boxed_slice()) + } + + /// Creates a logical equality formula of all given formulas in `param`. + pub fn eq(params: Vec) -> Formula { + Formula::Eq(params.into_boxed_slice()) + } } /// Represents a SAT instance for `.cnf` or `.sat` files. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Instance { - /// A `.cnf` SAT instance with clauses. - Cnf{ - /// The number of unique variables used within this `.cnf` SAT instance. - num_vars: u64, - - /// The clauses within this `.cnf` SAT instance formula. - clauses: Box<[Clause]> - }, - - /// A `.sat` SAT instance with an underlying formula and extensions. - Sat{ - /// The number of unique variables used within this `.sat` SAT instance. - num_vars: u64, - - /// Extensions (e.g. `XOR` or `EQ`) being used in this SAT instance. - extensions: Extensions, - - /// The underlying formula of this SAT instance. - formula: Formula - } + /// A `.cnf` SAT instance with clauses. + Cnf { + /// The number of unique variables used within this `.cnf` SAT instance. + num_vars: u64, + + /// The clauses within this `.cnf` SAT instance formula. + clauses: Box<[Clause]>, + }, + + /// A `.sat` SAT instance with an underlying formula and extensions. + Sat { + /// The number of unique variables used within this `.sat` SAT instance. + num_vars: u64, + + /// Extensions (e.g. `XOR` or `EQ`) being used in this SAT instance. + extensions: Extensions, + + /// The underlying formula of this SAT instance. + formula: Formula, + }, } impl Instance { - /// Creates a new SAT instance for `.cnf` files with given clauses. - pub fn cnf(num_vars: u64, clauses: Vec) -> Instance { - Instance::Cnf{num_vars: num_vars, clauses: clauses.into_boxed_slice()} - } - - /// Creates a new SAT instance for `.sat` files with given extensions and an underlying formula. - pub fn sat(num_vars: u64, extensions: Extensions, formula: Formula) -> Instance { - Instance::Sat{num_vars: num_vars, extensions: extensions, formula: formula} - } + /// Creates a new SAT instance for `.cnf` files with given clauses. + pub fn cnf(num_vars: u64, clauses: Vec) -> Instance { + Instance::Cnf { + num_vars: num_vars, + clauses: clauses.into_boxed_slice(), + } + } + + /// Creates a new SAT instance for `.sat` files with given extensions and an underlying formula. + pub fn sat(num_vars: u64, extensions: Extensions, formula: Formula) -> Instance { + Instance::Sat { + num_vars: num_vars, + extensions: extensions, + formula: formula, + } + } } bitflags! { - /// Possible extensions for `.sat` file SAT instances. + /// Possible extensions for `.sat` file SAT instances. pub struct Extensions: u32 { - /// If no extensions are being used. - const NONE = 0b00000000; - /// If the XOR-Extension is being used to allow for `xor(..)` formulas. + /// If no extensions are being used. + const NONE = 0b00000000; + /// If the XOR-Extension is being used to allow for `xor(..)` formulas. const XOR = 0b00000001; /// If the EQ-Extension is being used to allow for `=(..)` formulas. const EQ = 0b00000010; diff --git a/src/lexer.rs b/src/lexer.rs index bf4f41d..08558a2 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -4,309 +4,321 @@ use crate::errors::ErrorKind::*; #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub struct Token { - pub loc : Loc, - pub kind: TokenKind + pub loc: Loc, + pub kind: TokenKind, } impl Token { - pub fn new(loc: Loc, kind: TokenKind) -> Token { - Token{ - loc : loc, - kind: kind - } - } + pub fn new(loc: Loc, kind: TokenKind) -> Token { + Token { + loc: loc, + kind: kind, + } + } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum TokenKind { - /// Represents an entire 'c foo bar\n' line - Comment, + /// Represents an entire 'c foo bar\n' line + Comment, - /// Represents a positive, non-zero integer value, e.g. 42 - Nat(u64), + /// Represents a positive, non-zero integer value, e.g. 42 + Nat(u64), - /// Represents a zero integer value - Zero, + /// Represents a zero integer value + Zero, - /// Represents a '+' symbol, interpreted as logical or - Plus, + /// Represents a '+' symbol, interpreted as logical or + Plus, - /// Represents a '-' symbol, interpreted as logical negation for literals or formulas - Minus, // TODO! + /// Represents a '-' symbol, interpreted as logical negation for literals or formulas + Minus, // TODO! - /// Represents a '*' symbol, interpreted as logical and - Star, + /// Represents a '*' symbol, interpreted as logical and + Star, - /// Represents a '=' symbol, interpreted as logical equal - Eq, + /// Represents a '=' symbol, interpreted as logical equal + Eq, - /// Represents an opening parentheses '(' - Open, + /// Represents an opening parentheses '(' + Open, - /// Represents a closed parentheses ')' - Close, + /// Represents a closed parentheses ')' + Close, - /// Represents a known keyword, e.g. cnf, sat, sate, satex - Ident(Ident), + /// Represents a known keyword, e.g. cnf, sat, sate, satex + Ident(Ident), - /// Represents the end of a file - EndOfFile + /// Represents the end of a file + EndOfFile, } use self::TokenKind::*; impl TokenKind { - /// Returns `true` if this `TokenKind` is relevant for parsing purposes. - pub fn is_relevant(self) -> bool { - match self { - Comment => false, - _ => true - } - } + /// Returns `true` if this `TokenKind` is relevant for parsing purposes. + pub fn is_relevant(self) -> bool { + match self { + Comment => false, + _ => true, + } + } } #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum Ident { - /// Represents a 'p' keyword - Problem, + /// Represents a 'p' keyword + Problem, - /// Used in 'satx' or 'satex' extension formulas. - Xor, + /// Used in 'satx' or 'satex' extension formulas. + Xor, - /// Used as problem-kind parameter in problem lines to denote a CNF problem. - Cnf, + /// Used as problem-kind parameter in problem lines to denote a CNF problem. + Cnf, - /// Used as problem-kind parameter in problem lines to denote a SAT problem. - Sat, + /// Used as problem-kind parameter in problem lines to denote a SAT problem. + Sat, - /// Used as problem-kind parameter in problem lines to denote a CNF problem with the Xor extension. - Satx, + /// Used as problem-kind parameter in problem lines to denote a CNF problem with the Xor extension. + Satx, - /// Used as problem-kind parameter in problem lines to denote a CNF problem with the Eq extension. - Sate, + /// Used as problem-kind parameter in problem lines to denote a CNF problem with the Eq extension. + Sate, - /// Used as problem-kind parameter in problem lines to denote a CNF problem with the Eq and Xor extensions. - Satex + /// Used as problem-kind parameter in problem lines to denote a CNF problem with the Eq and Xor extensions. + Satex, } use self::Ident::*; #[derive(Debug, Clone)] pub struct Lexer - where I: Iterator +where + I: Iterator, { - /// input iterator - input : I, + /// input iterator + input: I, - /// internal buffer to map to known keywords - buffer: String, + /// internal buffer to map to known keywords + buffer: Vec, - /// the current character that is being dispatched upon - peek : char, + /// the current byte that is being dispatched upon + peek: u8, - /// represents the `Loc` of the next iterated item - nloc : Loc, + /// represents the `Loc` of the next iterated item + nloc: Loc, - /// represents the current `Loc` within the stream - cloc : Loc + /// represents the current `Loc` within the stream + cloc: Loc, } impl Lexer - where I: Iterator +where + I: Iterator, { - pub fn from(input: I) -> Lexer { - let mut lex = Lexer{ - input : input, - buffer: String::new(), - peek : '\0', - nloc : Loc::new(1, 0), - cloc : Loc::new(1, 0) - }; - lex.bump(); - lex - } - - fn bump_opt(&mut self) -> Option { - if let Some(peeked) = self.input.next() { - self.peek = peeked; - if peeked == '\n' { - self.cloc.bump_line() - } - else { - self.cloc.bump_col() - } - Some(peeked) - } - else { - None - } - } - - fn bump(&mut self) -> char { - self.peek = self.bump_opt().unwrap_or('\0'); - self.peek - } - - fn mk_token(&self, kind: TokenKind) -> Token { - Token::new(self.nloc, kind) - } - - fn mk_error(&self, kind: ErrorKind) -> ParseError { - ParseError::new(self.nloc, kind) - } - - fn tok(&self, kind: TokenKind) -> Result { - Ok(self.mk_token(kind)) - } - - fn bump_tok(&mut self, kind: TokenKind) -> Result { - self.bump(); - self.tok(kind) - } - - fn err(&self, kind: ErrorKind) -> Result { - Err(self.mk_error(kind)) - } - - fn skip_line(&mut self) { - while self.peek != '\n' && self.peek != '\0' { - self.bump(); - } - } - - fn scan_comment(&mut self) -> Result { - self.skip_line(); - self.tok(Comment) - } - - fn unknown_keyword(&mut self) -> Result { - while self.bump().is_alphanumeric() {} - self.err(UnknownKeyword) - } - - fn scan_keyword(&mut self) -> Result { - self.buffer.clear(); - self.buffer.push(self.peek); - while self.bump().is_alphanumeric() { - if self.buffer.len() < 5 { - self.buffer.push(self.peek); - } - else { - return self.unknown_keyword(); - } - } - match self.buffer.as_str() { - "c" => self.scan_comment(), - "p" => self.tok(Ident(Problem)), - "cnf" => self.tok(Ident(Cnf)), - "sat" => self.tok(Ident(Sat)), - "sate" => self.tok(Ident(Sate)), - "satx" => self.tok(Ident(Satx)), - "satex" => self.tok(Ident(Satex)), - "xor" => self.tok(Ident(Xor)), - _ => self.err(UnknownKeyword) - } - } - - fn scan_nat(&mut self) -> Result { - let mut val = self.peek.to_digit(10) - .expect("expected a digit to base 10: (0...9)") as u64; - while let Some(parsed) = self.bump().to_digit(10) { - val *= 10; - val += parsed as u64; - } - self.tok(Nat(val)) - } - - fn skip_whitespace(&mut self) { - while self.peek.is_whitespace() { - self.bump(); - } - } - - fn update_nloc(&mut self) { - self.nloc = self.cloc; - } - - fn next_token(&mut self) -> Option> { - self.skip_whitespace(); - if self.peek == '\0' { return None; } - self.update_nloc(); - Some( - match self.peek { - 'A'..='Z' | - 'a'..='z' => self.scan_keyword(), - - '1'..='9' => self.scan_nat(), - - '0' => self.bump_tok(Zero), - '(' => self.bump_tok(Open), - ')' => self.bump_tok(Close), - '+' => self.bump_tok(Plus), - '*' => self.bump_tok(Star), - '=' => self.bump_tok(Eq), - '-' => self.bump_tok(Minus), - - _ => { - self.bump(); - self.err(InvalidTokenStart) - } - } - ) - } + pub fn from(input: I) -> Lexer { + let mut lex = Lexer { + input: input, + buffer: Vec::new(), + peek: b'\0', + nloc: Loc::new(1, 0), + cloc: Loc::new(1, 0), + }; + lex.bump(); + lex + } + + fn bump_opt(&mut self) -> Option { + if let Some(peeked) = self.input.next() { + self.peek = peeked; + if peeked == b'\n' { + self.cloc.bump_line() + } else { + self.cloc.bump_col() + } + Some(peeked) + } else { + None + } + } + + fn bump(&mut self) -> u8 { + self.peek = self.bump_opt().unwrap_or(b'\0'); + self.peek + } + + fn mk_token(&self, kind: TokenKind) -> Token { + Token::new(self.nloc, kind) + } + + fn mk_error(&self, kind: ErrorKind) -> ParseError { + ParseError::new(self.nloc, kind) + } + + fn tok(&self, kind: TokenKind) -> Result { + Ok(self.mk_token(kind)) + } + + fn bump_tok(&mut self, kind: TokenKind) -> Result { + self.bump(); + self.tok(kind) + } + + fn err(&self, kind: ErrorKind) -> Result { + Err(self.mk_error(kind)) + } + + fn skip_line(&mut self) { + while self.peek != b'\n' && self.peek != b'\0' { + self.bump(); + } + } + + fn scan_comment(&mut self) -> Result { + self.skip_line(); + self.tok(Comment) + } + + fn unknown_keyword(&mut self) -> Result { + while self.bump().is_ascii_alphanumeric() {} + self.err(UnknownKeyword) + } + + fn scan_keyword(&mut self) -> Result { + self.buffer.clear(); + self.buffer.push(self.peek); + while self.bump().is_ascii_alphanumeric() { + if self.buffer.len() < 5 { + self.buffer.push(self.peek); + } else { + return self.unknown_keyword(); + } + } + match self.buffer.as_slice() { + b"c" => self.scan_comment(), + b"p" => self.tok(Ident(Problem)), + b"cnf" => self.tok(Ident(Cnf)), + b"sat" => self.tok(Ident(Sat)), + b"sate" => self.tok(Ident(Sate)), + b"satx" => self.tok(Ident(Satx)), + b"satex" => self.tok(Ident(Satex)), + b"xor" => self.tok(Ident(Xor)), + _ => self.err(UnknownKeyword), + } + } + + fn scan_nat(&mut self) -> Result { + let mut val = if self.peek.is_ascii_digit() { + (self.peek - b'0') as u64 + } else { + panic!("expected a digit to base 10: (0...9)") + }; + loop { + let peeked = self.bump(); + if !peeked.is_ascii_digit() { + break; + } + val *= 10; + val += (peeked - b'0') as u64; + } + self.tok(Nat(val)) + } + + fn skip_whitespace(&mut self) { + while self.peek.is_ascii_whitespace() { + self.bump(); + } + } + + fn update_nloc(&mut self) { + self.nloc = self.cloc; + } + + fn next_token(&mut self) -> Option> { + self.skip_whitespace(); + if self.peek == b'\0' { + return None; + } + self.update_nloc(); + Some(match self.peek { + b'A'..=b'Z' | b'a'..=b'z' => self.scan_keyword(), + + b'1'..=b'9' => self.scan_nat(), + + b'0' => self.bump_tok(Zero), + b'(' => self.bump_tok(Open), + b')' => self.bump_tok(Close), + b'+' => self.bump_tok(Plus), + b'*' => self.bump_tok(Star), + b'=' => self.bump_tok(Eq), + b'-' => self.bump_tok(Minus), + + _ => { + self.bump(); + self.err(InvalidTokenStart) + } + }) + } } impl Iterator for Lexer - where I: Iterator +where + I: Iterator, { - type Item = Result; + type Item = Result; - fn next(&mut self) -> Option { - self.next_token() - } + fn next(&mut self) -> Option { + self.next_token() + } } #[derive(Debug, Clone)] pub struct ValidLexer - where I: Iterator +where + I: Iterator, { - input: Lexer + input: Lexer, } impl ValidLexer - where I: Iterator +where + I: Iterator, { - pub fn from(input: I) -> ValidLexer { - ValidLexer{ input: Lexer::from(input) } - } + pub fn from(input: I) -> ValidLexer { + ValidLexer { + input: Lexer::from(input), + } + } } impl Iterator for ValidLexer - where I: Iterator +where + I: Iterator, { - type Item = Result; - - fn next(&mut self) -> Option { - match self.input.next() { - None => None, - Some(res_tok) => match res_tok { - Err(err) => Some(Err(err)), - Ok(tok) => if tok.kind.is_relevant() { - Some(Ok(tok)) - } - else { - self.next() - } - } - } - } + type Item = Result; + + fn next(&mut self) -> Option { + match self.input.next() { + None => None, + Some(res_tok) => match res_tok { + Err(err) => Some(Err(err)), + Ok(tok) => { + if tok.kind.is_relevant() { + Some(Ok(tok)) + } else { + self.next() + } + } + }, + } + } } #[cfg(test)] mod tests { - use super::*; + use super::*; - #[test] - fn simple_cnf() { - let sample = r" + #[test] + fn simple_cnf() { + let sample = r" c Sample DIMACS .cnf file c holding some information c and trying to be some @@ -316,169 +328,217 @@ mod tests { -3 4 0 5 -6 7 0 -7 -8 -9 0"; - let mut lexer = Lexer::from(sample.chars()); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(2, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 4), Comment)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Ident(Problem))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 6), Ident(Cnf))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 10), Nat(42))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 13), Nat(1337))))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 4), Nat(1))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 6), Nat(2))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 8), Zero)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 4), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 5), Nat(3))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 7), Nat(4))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 9), Zero)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 4), Nat(5))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 6), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 7), Nat(6))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 9), Nat(7))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 11), Zero)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 4), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 5), Nat(7))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 7), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 8), Nat(8))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 10), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 11), Nat(9))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 13), Zero)))); - - assert_eq!(lexer.next(), None); - } - - #[test] - fn simple_sat() { - let sample = r" + let mut lexer = Lexer::from(sample.bytes()); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(2, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 4), Comment)))); + + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(6, 4), Ident(Problem)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(6, 6), Ident(Cnf)))) + ); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 10), Nat(42))))); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(6, 13), Nat(1337)))) + ); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 4), Nat(1))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 6), Nat(2))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 8), Zero)))); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 4), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 5), Nat(3))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 7), Nat(4))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(8, 9), Zero)))); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 4), Nat(5))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 6), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 7), Nat(6))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 9), Nat(7))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(9, 11), Zero)))); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 4), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 5), Nat(7))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 7), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 8), Nat(8))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 10), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 11), Nat(9))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(10, 13), Zero)))); + + assert_eq!(lexer.next(), None); + } + + #[test] + fn simple_sat() { + let sample = r" c Sample DIMACS .sat file p sat 42 1337 (*(+(1 3 -4) +(4) +(2 3)))"; - let mut lexer = Lexer::from(sample.chars()); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(2, 4), Comment)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 4), Ident(Problem))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 6), Ident(Sat))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 10), Nat(42))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 13), Nat(1337))))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 4), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 5), Star)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 6), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 7), Plus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 8), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 9), Nat(1))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 11), Nat(3))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 13), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 14), Nat(4))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 15), Close)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 4), Plus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 5), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 6), Nat(4))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 7), Close)))); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Plus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 5), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 6), Nat(2))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 8), Nat(3))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 9), Close)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 10), Close)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 11), Close)))); - - assert_eq!(lexer.next(), None); - } - - #[test] - fn tricky_1() { - let sample = r"(1-2)"; - let mut lexer = Lexer::from(sample.chars()); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 1), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 2), Nat(1))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 3), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 4), Nat(2))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 5), Close)))); - - assert_eq!(lexer.next(), None); - } - - #[test] - fn all_idents() { - let sample = r"p cnf sat satx sate satex xor"; - let mut lexer = Lexer::from(sample.chars()); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 1), Ident(Problem))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 3), Ident(Cnf))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 7), Ident(Sat))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 11), Ident(Satx))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 16), Ident(Sate))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 21), Ident(Satex))))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 27), Ident(Xor))))); - - assert_eq!(lexer.next(), None); - } - - #[test] - fn all_ops() { - let sample = r"()+-*="; - let mut lexer = Lexer::from(sample.chars()); - - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 1), Open)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 2), Close)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 3), Plus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 4), Minus)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 5), Star)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 6), Eq)))); - - assert_eq!(lexer.next(), None); - } - - #[test] - fn invalid_token_start() { - let sample = r"# foo Big"; - let mut lexer = Lexer::from(sample.chars()); - - assert_eq!(lexer.next(), Some(Err(ParseError::new(Loc::new(1, 1), InvalidTokenStart)))); - assert_eq!(lexer.next(), Some(Err(ParseError::new(Loc::new(1, 3), UnknownKeyword)))); - assert_eq!(lexer.next(), Some(Err(ParseError::new(Loc::new(1, 7), UnknownKeyword)))); - - assert_eq!(lexer.next(), None); - } - - #[test] - fn only_comments() { - let sample = r" + let mut lexer = Lexer::from(sample.bytes()); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(2, 4), Comment)))); + + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(3, 4), Ident(Problem)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(3, 6), Ident(Sat)))) + ); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 10), Nat(42))))); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(3, 13), Nat(1337)))) + ); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 4), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 5), Star)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 6), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 7), Plus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 8), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 9), Nat(1))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 11), Nat(3))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 13), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 14), Nat(4))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 15), Close)))); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 4), Plus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 5), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 6), Nat(4))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 7), Close)))); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Plus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 5), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 6), Nat(2))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 8), Nat(3))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 9), Close)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 10), Close)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 11), Close)))); + + assert_eq!(lexer.next(), None); + } + + #[test] + fn tricky_1() { + let sample = r"(1-2)"; + let mut lexer = Lexer::from(sample.bytes()); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 1), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 2), Nat(1))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 3), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 4), Nat(2))))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 5), Close)))); + + assert_eq!(lexer.next(), None); + } + + #[test] + fn all_idents() { + let sample = r"p cnf sat satx sate satex xor"; + let mut lexer = Lexer::from(sample.bytes()); + + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 1), Ident(Problem)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 3), Ident(Cnf)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 7), Ident(Sat)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 11), Ident(Satx)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 16), Ident(Sate)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 21), Ident(Satex)))) + ); + assert_eq!( + lexer.next(), + Some(Ok(Token::new(Loc::new(1, 27), Ident(Xor)))) + ); + + assert_eq!(lexer.next(), None); + } + + #[test] + fn all_ops() { + let sample = r"()+-*="; + let mut lexer = Lexer::from(sample.bytes()); + + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 1), Open)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 2), Close)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 3), Plus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 4), Minus)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 5), Star)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(1, 6), Eq)))); + + assert_eq!(lexer.next(), None); + } + + #[test] + fn invalid_token_start() { + let sample = r"# foo Big"; + let mut lexer = Lexer::from(sample.bytes()); + + assert_eq!( + lexer.next(), + Some(Err(ParseError::new(Loc::new(1, 1), InvalidTokenStart))) + ); + assert_eq!( + lexer.next(), + Some(Err(ParseError::new(Loc::new(1, 3), UnknownKeyword))) + ); + assert_eq!( + lexer.next(), + Some(Err(ParseError::new(Loc::new(1, 7), UnknownKeyword))) + ); + + assert_eq!(lexer.next(), None); + } + + #[test] + fn only_comments() { + let sample = r" c This is a comment. c Just like this. c That has to be filtered. c But not the following ... c Filter this, too! c And this!"; - let mut lexer = Lexer::from(sample.chars()); + let mut lexer = Lexer::from(sample.bytes()); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(2, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Comment)))); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(2, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(3, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(4, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(5, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Comment)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(7, 4), Comment)))); - assert_eq!(lexer.next(), None); - } + assert_eq!(lexer.next(), None); + } - #[test] - fn filter_valid() { - let sample = r" + #[test] + fn filter_valid() { + let sample = r" c This is a comment. c Just like this. c That has to be filtered. @@ -488,11 +548,14 @@ mod tests { INVALID c And this! "; - let mut lexer = ValidLexer::from(sample.chars()); + let mut lexer = ValidLexer::from(sample.bytes()); - assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Nat(42))))); - assert_eq!(lexer.next(), Some(Err(ParseError::new(Loc::new(8, 4), UnknownKeyword)))); + assert_eq!(lexer.next(), Some(Ok(Token::new(Loc::new(6, 4), Nat(42))))); + assert_eq!( + lexer.next(), + Some(Err(ParseError::new(Loc::new(8, 4), UnknownKeyword))) + ); - assert_eq!(lexer.next(), None); - } + assert_eq!(lexer.next(), None); + } } diff --git a/src/lib.rs b/src/lib.rs index df4b8d1..2cf9f7a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,17 @@ -//! The parser facility for parsing `.cnf` and `.sat` files as specified in the +//! The parser facility for parsing `.cnf` and `.sat` files as specified in the //! [DIMACS format specification](http://www.domagoj-babic.com/uploads/ResearchProjects/Spear/dimacs-cnf.pdf). -//! +//! //! The DIMACS format was specified for the DIMACS SAT solver competitions as input file format. //! Many other DIMACS file formats exist for other competitions, however, this crate currently only //! supports the formats that are relevant for SAT solvers. -//! +//! //! In `.cnf` the entire SAT formula is encoded as a conjunction of disjunctions and so mainly stores //! a list of clauses consisting of literals. -//! +//! //! The `.sat` format is slightly more difficult as the formula can be of a different shape and thus //! a `.sat` file internally looks similar to a Lisp file. #![cfg_attr(all(feature = "bench", test), feature(test))] - #![deny(missing_docs)] #[cfg(all(feature = "bench", test))] @@ -21,28 +20,13 @@ extern crate test; #[macro_use] extern crate bitflags; -mod items; mod errors; +mod items; mod lexer; mod parser; +pub use crate::errors::{ErrorKind, Loc, ParseError, Result}; pub use crate::items::{ - Clause, - Extensions, - Lit, - Var, - - Formula, - Instance, - Sign, - - FormulaBox, - FormulaList -}; -pub use crate::errors::{ - Loc, - ParseError, - ErrorKind, - Result + Clause, Extensions, Formula, FormulaBox, FormulaList, Instance, Lit, Sign, Var, }; -pub use crate::parser::parse_dimacs; +pub use crate::parser::{parse_dimacs, read_dimacs}; diff --git a/src/parser.rs b/src/parser.rs index 6a7fa12..d043d50 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -11,238 +11,261 @@ //! The `.sat` format is slightly more difficult as the formula can be of a different shape and thus //! a `.sat` file internally looks similar to a Lisp file. -use crate::lexer::*; +use std::io::Read; + use crate::errors::*; use crate::items::*; +use crate::lexer::*; #[derive(Debug, Clone)] struct Parser - where I: Iterator +where + I: Iterator, { - tokens: ValidLexer, - peek : Result + tokens: ValidLexer, + peek: Result, } impl Parser - where I: Iterator +where + I: Iterator, { - fn from(input: I) -> Parser { - Parser{ - tokens: ValidLexer::from(input), - peek : Err(ParseError::new(Loc::new(0, 0), ErrorKind::EmptyTokenStream)) - } - } - - fn mk_err(&self, kind: ErrorKind) -> ParseError { - ParseError::new(self.peek_loc(), kind) - } - - fn err(&self, kind: ErrorKind) -> Result { - Err(self.mk_err(kind)) - } - - fn peek_loc(&self) -> Loc { - match self.peek { - Ok(tok) => tok.loc, - Err(err) => err.loc - } - } - - fn consume(&mut self) -> Result { - self.peek = self.tokens - .next() - .unwrap_or(Ok(Token::new(self.peek_loc(), TokenKind::EndOfFile))); - self.peek - } - - fn expect(&mut self, expected: TokenKind) -> Result { - use self::TokenKind::EndOfFile; - use self::ErrorKind::{UnexpectedEndOfFile, UnexpectedToken}; - match self.peek?.kind { - k if k == expected => self.consume(), - EndOfFile => self.err(UnexpectedEndOfFile), - _ => self.err(UnexpectedToken) - } - } - - fn is_at_eof(&self) -> bool { - match self.peek { - Ok(peek) => peek.kind == TokenKind::EndOfFile, - _ => false - } - } - - fn expect_nat(&mut self) -> Result { - match self.peek?.kind { - TokenKind::Nat(val) => { - self.consume()?; - Ok(val) - }, - _ => self.err(ErrorKind::ExpectedNat) - } - } - - fn parse_header(&mut self) -> Result { - use self::TokenKind::{Ident}; - use self::Ident::*; - self.expect(Ident(Problem))?; - match self.peek?.kind { - Ident(Cnf) => self.parse_cnf_header(), - Ident(Sat) | - Ident(Sate) | - Ident(Satx) | - Ident(Satex) => self.parse_sat_header(), - _ => self.err(ErrorKind::UnexpectedToken) - } - } - - fn parse_cnf_header(&mut self) -> Result { - self.expect(TokenKind::Ident(Ident::Cnf))?; - let num_vars = self.expect_nat()?; - let num_clauses = self.expect_nat()?; - Ok(Instance::cnf(num_vars, self.parse_clauses(num_clauses)?)) - } - - fn parse_lit(&mut self) -> Result { - match self.peek?.kind { - TokenKind::Minus => { - self.consume()?; - Ok(Lit::from_i64(-(self.expect_nat()? as i64))) - } - TokenKind::Nat(val) => { - self.consume()?; - Ok(Lit::from_i64(val as i64)) - }, - _ => self.err(ErrorKind::ExpectedLit) - } - } - - fn parse_clause(&mut self) -> Result { - use self::TokenKind::{Minus, Nat, Zero, EndOfFile}; - use self::ErrorKind::{UnexpectedToken}; - let mut lits = Vec::new(); - loop { - match self.peek?.kind { - Minus | Nat(_) => lits.push(self.parse_lit()?), - Zero | EndOfFile => { self.consume()?; return Ok(Clause::from_vec(lits)) }, - _ => return self.err(UnexpectedToken) - } - } - } - - fn parse_clauses(&mut self, num_clauses: u64) -> Result> { - let mut clauses = Vec::with_capacity(num_clauses as usize); - while !self.is_at_eof() { - clauses.push(self.parse_clause()?); - } - Ok(clauses) - } - - fn parse_sat_extensions<'a>(&'a mut self) -> Result { - use self::TokenKind::{Ident}; - use self::Ident::{Sat, Sate, Satx, Satex}; - use self::ErrorKind::*; - match self.peek?.kind { - Ident(Sat) => { self.consume()?; Ok(Extensions::NONE) }, - Ident(Sate) => { self.consume()?; Ok(Extensions::EQ) }, - Ident(Satx) => { self.consume()?; Ok(Extensions::XOR) }, - Ident(Satex) => { self.consume()?; Ok(Extensions::EQ | Extensions::XOR) }, - _ => self.err(InvalidSatExtension) - } - } - - fn parse_sat_header(&mut self) -> Result { - let extensions = self.parse_sat_extensions()?; - let num_vars = self.expect_nat()?; - Ok(Instance::sat(num_vars, extensions, self.parse_paren_formula()?)) - } - - fn parse_formula(&mut self) -> Result { - use crate::lexer::TokenKind::*; - use crate::lexer::Ident::*; - let tok = self.peek?; - match tok.kind { - Nat(val) => { self.consume()?; Ok(Formula::lit(Lit::from_i64(val as i64))) }, - Open => self.parse_paren_formula(), - Plus => self.parse_or_formula(), - Star => self.parse_and_formula(), - Minus => self.parse_neg_formula(), - Eq => self.parse_eq_formula(), - Ident(Xor) => self.parse_xor_formula(), - _ => self.err(ErrorKind::UnexpectedToken) - } - } - - fn parse_formula_list(&mut self) -> Result> { - let mut formulas = Vec::new(); - while self.peek?.kind != TokenKind::Close { - formulas.push(self.parse_formula()?); - } - Ok(formulas) - } - - fn parse_formula_params(&mut self) -> Result> { - self.expect(TokenKind::Open)?; - let params = self.parse_formula_list()?; - self.expect(TokenKind::Close)?; - Ok(params) - } - - fn parse_paren_formula(&mut self) -> Result { - self.expect(TokenKind::Open)?; - let formula = Formula::paren(self.parse_formula()?); - self.expect(TokenKind::Close)?; - Ok(formula) - } - - fn parse_neg_formula(&mut self) -> Result { - self.expect(TokenKind::Minus)?; - let tok = self.peek?; - match tok.kind { - TokenKind::Open => { - self.expect(TokenKind::Open)?; - let formula = Formula::neg(self.parse_formula()?); - self.expect(TokenKind::Close)?; - Ok(formula) - }, - TokenKind::Nat(val) => { - self.consume()?; - Ok(Formula::lit(Lit::from_i64( -(val as i64) ))) - }, - _ => self.err(ErrorKind::UnexpectedToken) - } - } - - fn parse_or_formula(&mut self) -> Result { - self.expect(TokenKind::Plus)?; - Ok(Formula::or(self.parse_formula_params()?)) - } - - fn parse_and_formula(&mut self) -> Result { - self.expect(TokenKind::Star)?; - Ok(Formula::and(self.parse_formula_params()?)) - } - - fn parse_eq_formula(&mut self) -> Result { - self.expect(TokenKind::Eq)?; - Ok(Formula::eq(self.parse_formula_params()?)) - } - - fn parse_xor_formula(&mut self) -> Result { - self.expect(TokenKind::Ident(Ident::Xor))?; - Ok(Formula::xor(self.parse_formula_params()?)) - } - - fn parse_dimacs(&mut self) -> Result { - self.consume()?; - let instance = self.parse_header(); - if self.is_at_eof() { - instance - } - else { - self.err(ErrorKind::NotParsedToEnd) - } - } + fn from(input: I) -> Parser { + Parser { + tokens: ValidLexer::from(input), + peek: Err(ParseError::new(Loc::new(0, 0), ErrorKind::EmptyTokenStream)), + } + } + + fn mk_err(&self, kind: ErrorKind) -> ParseError { + ParseError::new(self.peek_loc(), kind) + } + + fn err(&self, kind: ErrorKind) -> Result { + Err(self.mk_err(kind)) + } + + fn peek_loc(&self) -> Loc { + match self.peek { + Ok(tok) => tok.loc, + Err(err) => err.loc, + } + } + + fn consume(&mut self) -> Result { + self.peek = self + .tokens + .next() + .unwrap_or(Ok(Token::new(self.peek_loc(), TokenKind::EndOfFile))); + self.peek + } + + fn expect(&mut self, expected: TokenKind) -> Result { + use self::ErrorKind::{UnexpectedEndOfFile, UnexpectedToken}; + use self::TokenKind::EndOfFile; + match self.peek?.kind { + k if k == expected => self.consume(), + EndOfFile => self.err(UnexpectedEndOfFile), + _ => self.err(UnexpectedToken), + } + } + + fn is_at_eof(&self) -> bool { + match self.peek { + Ok(peek) => peek.kind == TokenKind::EndOfFile, + _ => false, + } + } + + fn expect_nat(&mut self) -> Result { + match self.peek?.kind { + TokenKind::Nat(val) => { + self.consume()?; + Ok(val) + } + _ => self.err(ErrorKind::ExpectedNat), + } + } + + fn parse_header(&mut self) -> Result { + use self::Ident::*; + use self::TokenKind::Ident; + self.expect(Ident(Problem))?; + match self.peek?.kind { + Ident(Cnf) => self.parse_cnf_header(), + Ident(Sat) | Ident(Sate) | Ident(Satx) | Ident(Satex) => self.parse_sat_header(), + _ => self.err(ErrorKind::UnexpectedToken), + } + } + + fn parse_cnf_header(&mut self) -> Result { + self.expect(TokenKind::Ident(Ident::Cnf))?; + let num_vars = self.expect_nat()?; + let num_clauses = self.expect_nat()?; + Ok(Instance::cnf(num_vars, self.parse_clauses(num_clauses)?)) + } + + fn parse_lit(&mut self) -> Result { + match self.peek?.kind { + TokenKind::Minus => { + self.consume()?; + Ok(Lit::from_i64(-(self.expect_nat()? as i64))) + } + TokenKind::Nat(val) => { + self.consume()?; + Ok(Lit::from_i64(val as i64)) + } + _ => self.err(ErrorKind::ExpectedLit), + } + } + + fn parse_clause(&mut self) -> Result { + use self::ErrorKind::UnexpectedToken; + use self::TokenKind::{EndOfFile, Minus, Nat, Zero}; + let mut lits = Vec::new(); + loop { + match self.peek?.kind { + Minus | Nat(_) => lits.push(self.parse_lit()?), + Zero | EndOfFile => { + self.consume()?; + return Ok(Clause::from_vec(lits)); + } + _ => return self.err(UnexpectedToken), + } + } + } + + fn parse_clauses(&mut self, num_clauses: u64) -> Result> { + let mut clauses = Vec::with_capacity(num_clauses as usize); + while !self.is_at_eof() { + clauses.push(self.parse_clause()?); + } + Ok(clauses) + } + + fn parse_sat_extensions<'a>(&'a mut self) -> Result { + use self::ErrorKind::*; + use self::Ident::{Sat, Sate, Satex, Satx}; + use self::TokenKind::Ident; + match self.peek?.kind { + Ident(Sat) => { + self.consume()?; + Ok(Extensions::NONE) + } + Ident(Sate) => { + self.consume()?; + Ok(Extensions::EQ) + } + Ident(Satx) => { + self.consume()?; + Ok(Extensions::XOR) + } + Ident(Satex) => { + self.consume()?; + Ok(Extensions::EQ | Extensions::XOR) + } + _ => self.err(InvalidSatExtension), + } + } + + fn parse_sat_header(&mut self) -> Result { + let extensions = self.parse_sat_extensions()?; + let num_vars = self.expect_nat()?; + Ok(Instance::sat( + num_vars, + extensions, + self.parse_paren_formula()?, + )) + } + + fn parse_formula(&mut self) -> Result { + use crate::lexer::Ident::*; + use crate::lexer::TokenKind::*; + let tok = self.peek?; + match tok.kind { + Nat(val) => { + self.consume()?; + Ok(Formula::lit(Lit::from_i64(val as i64))) + } + Open => self.parse_paren_formula(), + Plus => self.parse_or_formula(), + Star => self.parse_and_formula(), + Minus => self.parse_neg_formula(), + Eq => self.parse_eq_formula(), + Ident(Xor) => self.parse_xor_formula(), + _ => self.err(ErrorKind::UnexpectedToken), + } + } + + fn parse_formula_list(&mut self) -> Result> { + let mut formulas = Vec::new(); + while self.peek?.kind != TokenKind::Close { + formulas.push(self.parse_formula()?); + } + Ok(formulas) + } + + fn parse_formula_params(&mut self) -> Result> { + self.expect(TokenKind::Open)?; + let params = self.parse_formula_list()?; + self.expect(TokenKind::Close)?; + Ok(params) + } + + fn parse_paren_formula(&mut self) -> Result { + self.expect(TokenKind::Open)?; + let formula = Formula::paren(self.parse_formula()?); + self.expect(TokenKind::Close)?; + Ok(formula) + } + + fn parse_neg_formula(&mut self) -> Result { + self.expect(TokenKind::Minus)?; + let tok = self.peek?; + match tok.kind { + TokenKind::Open => { + self.expect(TokenKind::Open)?; + let formula = Formula::neg(self.parse_formula()?); + self.expect(TokenKind::Close)?; + Ok(formula) + } + TokenKind::Nat(val) => { + self.consume()?; + Ok(Formula::lit(Lit::from_i64(-(val as i64)))) + } + _ => self.err(ErrorKind::UnexpectedToken), + } + } + + fn parse_or_formula(&mut self) -> Result { + self.expect(TokenKind::Plus)?; + Ok(Formula::or(self.parse_formula_params()?)) + } + + fn parse_and_formula(&mut self) -> Result { + self.expect(TokenKind::Star)?; + Ok(Formula::and(self.parse_formula_params()?)) + } + + fn parse_eq_formula(&mut self) -> Result { + self.expect(TokenKind::Eq)?; + Ok(Formula::eq(self.parse_formula_params()?)) + } + + fn parse_xor_formula(&mut self) -> Result { + self.expect(TokenKind::Ident(Ident::Xor))?; + Ok(Formula::xor(self.parse_formula_params()?)) + } + + fn parse_dimacs(&mut self) -> Result { + self.consume()?; + let instance = self.parse_header(); + if self.is_at_eof() { + instance + } else { + self.err(ErrorKind::NotParsedToEnd) + } + } } /// Parses a the given string as `.cnf` or `.sat` file as specified in @@ -250,16 +273,104 @@ impl Parser /// /// Returns an appropriate SAT instance if no errors occured while parsing. pub fn parse_dimacs(input: &str) -> Result { - Parser::from(input.chars()).parse_dimacs() + Parser::from(input.bytes()).parse_dimacs() +} + +/// Parses a the given byte source as `.cnf` or `.sat` file as specified in +/// [DIMACS format specification](http://www.domagoj-babic.com/uploads/ResearchProjects/Spear/dimacs-cnf.pdf). +/// +/// Returns an appropriate SAT instance if no errors occured while parsing. +pub fn read_dimacs(input: R) -> Result { + Parser::from(input.bytes().map(|b| b.expect("IO Error"))).parse_dimacs() } #[cfg(test)] mod tests { - use super::*; + use std::io::Read; - #[test] - fn simple_cnf_1() { - let sample = r" + use super::*; + + #[test] + fn simple_cnf_1() { + let sample = r" + c Sample DIMACS .cnf file + c holding some information + c and trying to be some + c kind of a test. + p cnf 42 1337 + 1 2 0 + -3 4 0 + 5 -6 7 0 + -7 -8 -9 0"; + let parsed = parse_dimacs(sample).expect("valid .cnf"); + let expected = Instance::cnf( + 42, + vec![ + Clause::from_vec(vec![Lit::from_i64(1), Lit::from_i64(2)]), + Clause::from_vec(vec![Lit::from_i64(-3), Lit::from_i64(4)]), + Clause::from_vec(vec![Lit::from_i64(5), Lit::from_i64(-6), Lit::from_i64(7)]), + Clause::from_vec(vec![ + Lit::from_i64(-7), + Lit::from_i64(-8), + Lit::from_i64(-9), + ]), + ], + ); + assert_eq!(parsed, expected); + } + + #[test] + fn simple_cnf_2() { + let sample = r" + c Example CNF format file + c + p cnf 4 3 + 1 3 -4 0 + 4 0 2 + -3"; + let parsed = parse_dimacs(sample).expect("valid .cnf"); + let expected = Instance::cnf( + 4, + vec![ + Clause::from_vec(vec![Lit::from_i64(1), Lit::from_i64(3), Lit::from_i64(-4)]), + Clause::from_vec(vec![Lit::from_i64(4)]), + Clause::from_vec(vec![Lit::from_i64(2), Lit::from_i64(-3)]), + ], + ); + assert_eq!(parsed, expected); + } + + #[test] + fn simple_sat() { + let sample = r" + c Sample DIMACS .sat file + p sat 42 + (*(+(1 3 -4) + +(4) + +(2 3)))"; + let parsed = parse_dimacs(sample).expect("valid .sat"); + let expected = Instance::sat( + 42, + Extensions::NONE, + Formula::paren(Formula::and(vec![ + Formula::or(vec![ + Formula::lit(Lit::from_i64(1)), + Formula::lit(Lit::from_i64(3)), + Formula::lit(Lit::from_i64(-4)), + ]), + Formula::or(vec![Formula::lit(Lit::from_i64(4))]), + Formula::or(vec![ + Formula::lit(Lit::from_i64(2)), + Formula::lit(Lit::from_i64(3)), + ]), + ])), + ); + assert_eq!(parsed, expected); + } + + #[test] + fn simple_cnf_read_1() { + let sample = r" c Sample DIMACS .cnf file c holding some information c and trying to be some @@ -269,58 +380,69 @@ mod tests { -3 4 0 5 -6 7 0 -7 -8 -9 0"; - let parsed = parse_dimacs(sample).expect("valid .cnf"); - let expected = Instance::cnf(42, vec![ - Clause::from_vec(vec![Lit::from_i64( 1), Lit::from_i64( 2)]), - Clause::from_vec(vec![Lit::from_i64(-3), Lit::from_i64( 4)]), - Clause::from_vec(vec![Lit::from_i64( 5), Lit::from_i64(-6), Lit::from_i64( 7)]), - Clause::from_vec(vec![Lit::from_i64(-7), Lit::from_i64(-8), Lit::from_i64(-9)]) - ]); - assert_eq!(parsed, expected); - } - - #[test] - fn simple_cnf_2() { - let sample = r" + let parsed = read_dimacs(sample.as_bytes()).expect("valid .cnf"); + let expected = Instance::cnf( + 42, + vec![ + Clause::from_vec(vec![Lit::from_i64(1), Lit::from_i64(2)]), + Clause::from_vec(vec![Lit::from_i64(-3), Lit::from_i64(4)]), + Clause::from_vec(vec![Lit::from_i64(5), Lit::from_i64(-6), Lit::from_i64(7)]), + Clause::from_vec(vec![ + Lit::from_i64(-7), + Lit::from_i64(-8), + Lit::from_i64(-9), + ]), + ], + ); + assert_eq!(parsed, expected); + } + + #[test] + fn simple_cnf_read_2() { + let sample = r" c Example CNF format file c p cnf 4 3 1 3 -4 0 4 0 2 -3"; - let parsed = parse_dimacs(sample).expect("valid .cnf"); - let expected = Instance::cnf(4, vec![ - Clause::from_vec(vec![Lit::from_i64( 1), Lit::from_i64( 3), Lit::from_i64(-4)]), - Clause::from_vec(vec![Lit::from_i64( 4)]), - Clause::from_vec(vec![Lit::from_i64( 2), Lit::from_i64(-3)]) - ]); - assert_eq!(parsed, expected); - } - - #[test] - fn simple_sat() { - let sample = r" + let parsed = read_dimacs(sample.as_bytes()).expect("valid .cnf"); + let expected = Instance::cnf( + 4, + vec![ + Clause::from_vec(vec![Lit::from_i64(1), Lit::from_i64(3), Lit::from_i64(-4)]), + Clause::from_vec(vec![Lit::from_i64(4)]), + Clause::from_vec(vec![Lit::from_i64(2), Lit::from_i64(-3)]), + ], + ); + assert_eq!(parsed, expected); + } + + #[test] + fn simple_sat_read() { + let sample = r" c Sample DIMACS .sat file p sat 42 (*(+(1 3 -4) +(4) +(2 3)))"; - let parsed = parse_dimacs(sample).expect("valid .sat"); - let expected = Instance::sat(42, Extensions::NONE, - Formula::paren( - Formula::and(vec![ - Formula::or(vec![ - Formula::lit(Lit::from_i64(1)), Formula::lit(Lit::from_i64(3)), Formula::lit(Lit::from_i64(-4)) - ]), - Formula::or(vec![ - Formula::lit(Lit::from_i64(4)) - ]), - Formula::or(vec![ - Formula::lit(Lit::from_i64(2)), Formula::lit(Lit::from_i64(3)) - ]) - ]) - ) - ); - assert_eq!(parsed, expected); - } + let parsed = read_dimacs(sample.as_bytes()).expect("valid .sat"); + let expected = Instance::sat( + 42, + Extensions::NONE, + Formula::paren(Formula::and(vec![ + Formula::or(vec![ + Formula::lit(Lit::from_i64(1)), + Formula::lit(Lit::from_i64(3)), + Formula::lit(Lit::from_i64(-4)), + ]), + Formula::or(vec![Formula::lit(Lit::from_i64(4))]), + Formula::or(vec![ + Formula::lit(Lit::from_i64(2)), + Formula::lit(Lit::from_i64(3)), + ]), + ])), + ); + assert_eq!(parsed, expected); + } }