diff --git a/.gitignore b/.gitignore index be2152e..b9012db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /target /.idea -Cargo.lock \ No newline at end of file +Cargo.lock +test.txt \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml index 52467fb..8d01d82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +num-bigint = "0.4.3" diff --git a/src/main.rs b/src/main.rs index e7a11a9..82351e6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,12 @@ +extern crate core; + +mod parsing; + fn main() { - println!("Hello, world!"); + let args: Vec = std::env::args().collect(); + let file = std::fs::read_to_string(&args[1]).expect("File not found"); + let tokens = parsing::lex(&file); + println!("{:?}", tokens); + let parsed = parsing::parse(&tokens); + println!("{:?}", parsed); } diff --git a/src/parsing/lexer.rs b/src/parsing/lexer.rs new file mode 100644 index 0000000..d1fcd72 --- /dev/null +++ b/src/parsing/lexer.rs @@ -0,0 +1,73 @@ +use num_bigint::BigInt; +use super::Operator; + +#[derive(Debug)] +pub enum Token { + Operator(Operator), + Identifier(String), + Number(BigInt), + String(String), + OpenBrace, + CloseBrace, + OpenParen, + CloseParen +} + +pub fn lex(str: &String) -> Vec { + let iter: Vec = str.chars().collect(); + let mut tokens = Vec::new(); + let mut i = 0; + while let Some(next_char) = iter.get(i) { + i += 1; + match next_char { + '(' => tokens.push(Token::OpenParen), + ')' => tokens.push(Token::CloseParen), + '{' => tokens.push(Token::OpenBrace), + '}' => tokens.push(Token::CloseBrace), + '"' => { + let mut string = String::new(); + while let Some(next_char) = iter.get(i) { + i += 1; + if *next_char == '"' && iter[i - 2] != '\\' { + break; + } + string.push(*next_char); + } + tokens.push(Token::String(string)); + } + ' ' => continue, + _ => { + if next_char.is_ascii_digit() { + let mut number = String::new(); + number.push(*next_char); + while let Some(next_char) = iter.get(i) { + if next_char.is_ascii_digit() { + number.push(*next_char); + i += 1; + } else { + break; + } + } + tokens.push(Token::Number(BigInt::parse_bytes(number.as_bytes(), 10).unwrap())); + } else if next_char.is_ascii_alphabetic() { + let mut identifier = String::new(); + identifier.push(*next_char); + while let Some(next_char) = iter.get(i) { + if next_char.is_ascii_alphanumeric() { + identifier.push(*next_char); + i += 1; + } else { + break; + } + } + tokens.push(Token::Identifier(identifier)); + } else { + if let Some(operator) = Operator::from_char(next_char) { + tokens.push(Token::Operator(operator)); + } + } + } + } + } + tokens +} \ No newline at end of file diff --git a/src/parsing/mod.rs b/src/parsing/mod.rs new file mode 100644 index 0000000..1df2fe3 --- /dev/null +++ b/src/parsing/mod.rs @@ -0,0 +1,7 @@ +mod lexer; +mod operator; +mod parser; + +pub use lexer::lex; +pub use parser::parse; +pub use operator::Operator; \ No newline at end of file diff --git a/src/parsing/operator.rs b/src/parsing/operator.rs new file mode 100644 index 0000000..9919953 --- /dev/null +++ b/src/parsing/operator.rs @@ -0,0 +1,97 @@ +use std::fmt::Debug; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum Operator { + Plus, + Minus, + Multiply, + Divide, + Modulo, + Negate, + + Equal, + NotEqual, + LessThan, + GreaterThan, + LessThanEqual, + GreaterThanEqual, + Or, + And, + Not, + Implies, + + Contains, + NotContains, + Subset, + StrictSubset, + Superset, + StrictSuperset, + Union, + Intersection, + Difference, + SymmetricDifference, + Size +} + +impl Operator { + pub fn from_char(c: &char) -> Option { + match c { + '+' => Some(Operator::Plus), + '-' => Some(Operator::Minus), + '*' => Some(Operator::Multiply), + '/' => Some(Operator::Divide), + '%' => Some(Operator::Modulo), + '¯' => Some(Operator::Negate), + + '=' => Some(Operator::Equal), + '≠' => Some(Operator::NotEqual), + '<' => Some(Operator::LessThan), + '>' => Some(Operator::GreaterThan), + '≤' => Some(Operator::LessThanEqual), + '≥' => Some(Operator::GreaterThanEqual), + '∨' => Some(Operator::Or), + '∧' => Some(Operator::And), + '¬' => Some(Operator::Not), + '⇒' => Some(Operator::Implies), + + '∈' => Some(Operator::Contains), + '∉' => Some(Operator::NotContains), + '⊆' => Some(Operator::Subset), + '⊂' => Some(Operator::StrictSubset), + '⊇' => Some(Operator::Superset), + '⊃' => Some(Operator::StrictSuperset), + '∪' => Some(Operator::Union), + '∩' => Some(Operator::Intersection), + '\\' => Some(Operator::Difference), + '∆' => Some(Operator::SymmetricDifference), + '#' => Some(Operator::Size), + _ => None + } + } + + pub fn arity(&self) -> i32 { + match self { + Operator::Not | Operator::Negate | Operator::Size => 1, + _ => 2 + } + } + + pub fn precedence(&self) -> i32 { + match self { + Operator::Contains | Operator::NotContains | Operator::Subset | Operator::StrictSubset + | Operator::Superset | Operator::StrictSuperset | Operator::Union | Operator::Intersection + | Operator::Difference | Operator::SymmetricDifference => 1, + + Operator::Or | Operator::And | Operator::Implies => 2, + + Operator::Equal | Operator::NotEqual | Operator::LessThan | Operator::GreaterThan + | Operator::LessThanEqual | Operator::GreaterThanEqual => 3, + + Operator::Plus | Operator::Minus => 4, + + Operator::Multiply | Operator::Divide | Operator::Modulo => 5, + + Operator::Not | Operator::Negate | Operator::Size => 7 + } + } +} \ No newline at end of file diff --git a/src/parsing/parser.rs b/src/parsing/parser.rs new file mode 100644 index 0000000..02e399e --- /dev/null +++ b/src/parsing/parser.rs @@ -0,0 +1,103 @@ +use num_bigint::BigInt; +use super::lexer::Token; +use super::Operator; + +pub fn parse(tokens: &Vec) -> Option { + parse_expression(tokens) +} + +fn parse_expression(tokens: &[Token]) -> Option { + let lowest = lowest_op(tokens); + if let Some(lowest_index) = lowest { + let op = match &tokens[lowest_index] { + Token::Operator(op) => op, + _ => panic!("Expected operator token") + }; + let right = Box::new(parse_expression(&tokens[lowest_index + 1..]) + .expect("Failed to parse right side of expression")); + return if op.arity() == 1 { + Some(AstNode::UnaryExpression(*op, right)) + } else if let Some(left) = parse_expression(&tokens[..lowest_index]) { + Some(AstNode::BinaryExpression(Box::new(left), *op, right)) + } else { + None + }; + } else { + parse_atom(tokens) + } +} + +/// Returns the index of the rightmost operator with the lowest precedence +fn lowest_op(tokens: &[Token]) -> Option { + let mut lowest = i32::MAX; + let mut lowest_index = 0; + let mut depth = 0; + for (i, token) in tokens.iter().enumerate() { + match token { + Token::Operator(op) => { + // <= because we want the rightmost operator + if depth == 0 && op.precedence() <= lowest { + lowest = op.precedence(); + lowest_index = i; + } + } + Token::OpenParen => depth += 1, + Token::CloseParen => depth -= 1, + _ => {} + } + } + if lowest == i32::MAX { None } else { Some(lowest_index) } +} + +fn parse_atom(tokens: &[Token]) -> Option { + let token = tokens.first(); + if let Some(token) = token { + let result = match token { + Token::Number(n) => Some(AstNode::Number(n.clone())), + Token::Identifier(v) => Some(AstNode::Variable(v.clone())), + Token::OpenParen => { + let mut depth = 1; + let mut i = 1; + while depth > 0 { + match tokens[i] { + Token::OpenParen => depth += 1, + Token::CloseParen => depth -= 1, + _ => {} + } + i += 1; + } + parse_expression(&tokens[1..i - 1]) + } + _ => None + }; + result + } else { + None + } +} + +#[derive(Debug, Clone)] +pub enum AstNode { + Number(BigInt), + Variable(String), + UnaryExpression(Operator, Box), + BinaryExpression(Box, Operator, Box), +} + +impl AstNode { + pub fn walk(&self, f: &dyn Fn(&AstNode)) -> AstNode { + f(self); + match self { + AstNode::Number(_) => self.clone(), + AstNode::Variable(_) => self.clone(), + AstNode::UnaryExpression(op, node) => + AstNode::UnaryExpression(*op, Box::new(node.walk(f))), + AstNode::BinaryExpression(left, op, right) => + AstNode::BinaryExpression( + Box::new(left.walk(f)), + *op, + Box::new(right.walk(f)), + ) + } + } +} \ No newline at end of file