Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
/target
/.idea

Cargo.lock
Cargo.lock
test.txt
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
num-bigint = "0.4.3"
11 changes: 10 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
extern crate core;

mod parsing;

fn main() {
println!("Hello, world!");
let args: Vec<String> = std::env::args().collect();
let file = std::fs::read_to_string(&args[1]).expect("File not found");
let tokens = parsing::lex(&file);
println!("{:?}", tokens);
let parsed = parsing::parse(&tokens);
println!("{:?}", parsed);
}
73 changes: 73 additions & 0 deletions src/parsing/lexer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use num_bigint::BigInt;
use super::Operator;

#[derive(Debug)]
pub enum Token {
Operator(Operator),
Identifier(String),
Number(BigInt),
String(String),
OpenBrace,
CloseBrace,
OpenParen,
CloseParen
}

pub fn lex(str: &String) -> Vec<Token> {
let iter: Vec<char> = str.chars().collect();
let mut tokens = Vec::new();
let mut i = 0;
while let Some(next_char) = iter.get(i) {
i += 1;
match next_char {
'(' => tokens.push(Token::OpenParen),
')' => tokens.push(Token::CloseParen),
'{' => tokens.push(Token::OpenBrace),
'}' => tokens.push(Token::CloseBrace),
'"' => {
let mut string = String::new();
while let Some(next_char) = iter.get(i) {
i += 1;
if *next_char == '"' && iter[i - 2] != '\\' {
break;
}
string.push(*next_char);
}
tokens.push(Token::String(string));
}
' ' => continue,
_ => {
if next_char.is_ascii_digit() {
let mut number = String::new();
number.push(*next_char);
while let Some(next_char) = iter.get(i) {
if next_char.is_ascii_digit() {
number.push(*next_char);
i += 1;
} else {
break;
}
}
tokens.push(Token::Number(BigInt::parse_bytes(number.as_bytes(), 10).unwrap()));
} else if next_char.is_ascii_alphabetic() {
let mut identifier = String::new();
identifier.push(*next_char);
while let Some(next_char) = iter.get(i) {
if next_char.is_ascii_alphanumeric() {
identifier.push(*next_char);
i += 1;
} else {
break;
}
}
tokens.push(Token::Identifier(identifier));
} else {
if let Some(operator) = Operator::from_char(next_char) {
tokens.push(Token::Operator(operator));
}
}
}
}
}
tokens
}
7 changes: 7 additions & 0 deletions src/parsing/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
mod lexer;
mod operator;
mod parser;

pub use lexer::lex;
pub use parser::parse;
pub use operator::Operator;
97 changes: 97 additions & 0 deletions src/parsing/operator.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
use std::fmt::Debug;

#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Operator {
Plus,
Minus,
Multiply,
Divide,
Modulo,
Negate,

Equal,
NotEqual,
LessThan,
GreaterThan,
LessThanEqual,
GreaterThanEqual,
Or,
And,
Not,
Implies,

Contains,
NotContains,
Subset,
StrictSubset,
Superset,
StrictSuperset,
Union,
Intersection,
Difference,
SymmetricDifference,
Size
}

impl Operator {
pub fn from_char(c: &char) -> Option<Operator> {
match c {
'+' => Some(Operator::Plus),
'-' => Some(Operator::Minus),
'*' => Some(Operator::Multiply),
'/' => Some(Operator::Divide),
'%' => Some(Operator::Modulo),
'¯' => Some(Operator::Negate),

'=' => Some(Operator::Equal),
'≠' => Some(Operator::NotEqual),
'<' => Some(Operator::LessThan),
'>' => Some(Operator::GreaterThan),
'≤' => Some(Operator::LessThanEqual),
'≥' => Some(Operator::GreaterThanEqual),
'∨' => Some(Operator::Or),
'∧' => Some(Operator::And),
'¬' => Some(Operator::Not),
'⇒' => Some(Operator::Implies),

'∈' => Some(Operator::Contains),
'∉' => Some(Operator::NotContains),
'⊆' => Some(Operator::Subset),
'⊂' => Some(Operator::StrictSubset),
'⊇' => Some(Operator::Superset),
'⊃' => Some(Operator::StrictSuperset),
'∪' => Some(Operator::Union),
'∩' => Some(Operator::Intersection),
'\\' => Some(Operator::Difference),
'∆' => Some(Operator::SymmetricDifference),
'#' => Some(Operator::Size),
_ => None
}
}

pub fn arity(&self) -> i32 {
match self {
Operator::Not | Operator::Negate | Operator::Size => 1,
_ => 2
}
}

pub fn precedence(&self) -> i32 {
match self {
Operator::Contains | Operator::NotContains | Operator::Subset | Operator::StrictSubset
| Operator::Superset | Operator::StrictSuperset | Operator::Union | Operator::Intersection
| Operator::Difference | Operator::SymmetricDifference => 1,

Operator::Or | Operator::And | Operator::Implies => 2,

Operator::Equal | Operator::NotEqual | Operator::LessThan | Operator::GreaterThan
| Operator::LessThanEqual | Operator::GreaterThanEqual => 3,

Operator::Plus | Operator::Minus => 4,

Operator::Multiply | Operator::Divide | Operator::Modulo => 5,

Operator::Not | Operator::Negate | Operator::Size => 7
}
}
}
103 changes: 103 additions & 0 deletions src/parsing/parser.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
use num_bigint::BigInt;
use super::lexer::Token;
use super::Operator;

pub fn parse(tokens: &Vec<Token>) -> Option<AstNode> {
parse_expression(tokens)
}

fn parse_expression(tokens: &[Token]) -> Option<AstNode> {
let lowest = lowest_op(tokens);
if let Some(lowest_index) = lowest {
let op = match &tokens[lowest_index] {
Token::Operator(op) => op,
_ => panic!("Expected operator token")
};
let right = Box::new(parse_expression(&tokens[lowest_index + 1..])
.expect("Failed to parse right side of expression"));
return if op.arity() == 1 {
Some(AstNode::UnaryExpression(*op, right))
} else if let Some(left) = parse_expression(&tokens[..lowest_index]) {
Some(AstNode::BinaryExpression(Box::new(left), *op, right))
} else {
None
};
} else {
parse_atom(tokens)
}
}

/// Returns the index of the rightmost operator with the lowest precedence
fn lowest_op(tokens: &[Token]) -> Option<usize> {
let mut lowest = i32::MAX;
let mut lowest_index = 0;
let mut depth = 0;
for (i, token) in tokens.iter().enumerate() {
match token {
Token::Operator(op) => {
// <= because we want the rightmost operator
if depth == 0 && op.precedence() <= lowest {
lowest = op.precedence();
lowest_index = i;
}
}
Token::OpenParen => depth += 1,
Token::CloseParen => depth -= 1,
_ => {}
}
}
if lowest == i32::MAX { None } else { Some(lowest_index) }
}

fn parse_atom(tokens: &[Token]) -> Option<AstNode> {
let token = tokens.first();
if let Some(token) = token {
let result = match token {
Token::Number(n) => Some(AstNode::Number(n.clone())),
Token::Identifier(v) => Some(AstNode::Variable(v.clone())),
Token::OpenParen => {
let mut depth = 1;
let mut i = 1;
while depth > 0 {
match tokens[i] {
Token::OpenParen => depth += 1,
Token::CloseParen => depth -= 1,
_ => {}
}
i += 1;
}
parse_expression(&tokens[1..i - 1])
}
_ => None
};
result
} else {
None
}
}

#[derive(Debug, Clone)]
pub enum AstNode {
Number(BigInt),
Variable(String),
UnaryExpression(Operator, Box<AstNode>),
BinaryExpression(Box<AstNode>, Operator, Box<AstNode>),
}

impl AstNode {
pub fn walk(&self, f: &dyn Fn(&AstNode)) -> AstNode {
f(self);
match self {
AstNode::Number(_) => self.clone(),
AstNode::Variable(_) => self.clone(),
AstNode::UnaryExpression(op, node) =>
AstNode::UnaryExpression(*op, Box::new(node.walk(f))),
AstNode::BinaryExpression(left, op, right) =>
AstNode::BinaryExpression(
Box::new(left.walk(f)),
*op,
Box::new(right.walk(f)),
)
}
}
}