Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add list to tree feature #49

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
110 changes: 89 additions & 21 deletions src/compiler/sexp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,14 +204,15 @@ enum SExpParseState {
Bareword(Srcloc, Vec<u8>), //srcloc contains the file, line, column and length for the captured form
QuotedText(Srcloc, u8, Vec<u8>),
QuotedEscaped(Srcloc, u8, Vec<u8>),
OpenList(Srcloc),
ParsingList(Srcloc, Rc<SExpParseState>, Vec<Rc<SExp>>),
OpenList(Srcloc, bool),
ParsingList(Srcloc, Rc<SExpParseState>, Vec<Rc<SExp>>, bool), // Rc<SExpParseState> is for the inner state of the list, bool is is_structured
TermList(
Srcloc,
Option<Rc<SExp>>, // this is the second value in the dot expression
Rc<SExpParseState>, // used for inner parsing
Vec<Rc<SExp>>, // list content
),
StartStructuredList(Srcloc),
}

#[derive(Debug, PartialEq, Eq)]
Expand Down Expand Up @@ -535,17 +536,34 @@ impl SExp {
}
}

fn restructure_list(mut this_list: Vec<Rc<SExp>>, srcloc: Srcloc) -> Rc<SExp> {
// Check if the vector is empty
if this_list.len() == 1 {
return Rc::clone(&this_list[0]);
}
if this_list.is_empty() {
return Rc::new(SExp::Nil(srcloc.clone()));
}
// Remove and get the middle element as the root
let mid_index = this_list.len() / 2;
let left_subtree = restructure_list(this_list.drain(..mid_index).collect(), srcloc.clone());
let right_subtree = restructure_list(this_list, srcloc.clone());

Rc::new(make_cons(left_subtree, right_subtree))
}

fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -> SExpParseResult {
// switch on our state
match current_state {
SExpParseState::Empty => match this_char as char {
// we are not currently in a list
'(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state
'\n' => resume(SExpParseState::Empty), // new line, same state
'(' => resume(SExpParseState::OpenList(loc, false)), // move to OpenList state
'\n' => resume(SExpParseState::Empty), // new line, same state
';' => resume(SExpParseState::CommentText),
')' => error(loc, "Too many close parens"),
'"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), // match on "
'\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on '
'#' => resume(SExpParseState::StartStructuredList(loc)), // initiating a structured list
ch => {
if char::is_whitespace(ch) {
resume(SExpParseState::Empty)
Expand Down Expand Up @@ -601,7 +619,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
tcopy.push(this_char);
resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy))
}
SExpParseState::OpenList(srcloc) => match this_char as char {
SExpParseState::OpenList(srcloc, is_structured) => match this_char as char {
// we are beginning a new list
')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object
'.' => error(loc, "Dot can't appear directly after begin paren"),
Expand All @@ -612,44 +630,69 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
srcloc.ext(&loc),
Rc::new(current_state), // captured state from our pretend empty state
vec![o],
*is_structured,
)),
SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList(
// we're still reading the object, resume processing
srcloc.ext(&loc),
Rc::new(current_state), // captured state from our pretend empty state
Vec::new(),
*is_structured,
)),
SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error
},
},
// We are in the middle of a list currently
SExpParseState::ParsingList(srcloc, pp, list_content) => {
SExpParseState::ParsingList(srcloc, pp, list_content, is_structured) => {
// pp is the captured inside-list state we received from OpenList
match (this_char as char, pp.borrow()) {
('.', SExpParseState::Empty) => resume(SExpParseState::TermList(
match (this_char as char, pp.borrow(), is_structured) {
('.', SExpParseState::Empty, false) => resume(SExpParseState::TermList(
// dot notation showing cons cell
srcloc.ext(&loc),
None,
Rc::new(SExpParseState::Empty), // nested state is empty
list_content.to_vec(),
)),
(')', SExpParseState::Empty) => emit(
// close list and emit it upwards as a complete entity
Rc::new(enlist(srcloc.clone(), list_content)),
SExpParseState::Empty,
),
(')', SExpParseState::Bareword(l, t)) => {
('.', SExpParseState::Empty, true) => {
error(loc, "Dot expressions disallowed in structured lists")
}
(')', SExpParseState::Empty, _) => {
if *is_structured {
emit(
// close list and emit it upwards as a complete entity
restructure_list(list_content.to_vec(), srcloc.clone()),
SExpParseState::Empty,
)
} else {
emit(
// close list and emit it upwards as a complete entity
Rc::new(enlist(srcloc.clone(), list_content)),
SExpParseState::Empty,
)
}
}
(')', SExpParseState::Bareword(l, t), _) => {
// you've reached the end of the word AND the end of the list, close list and emit upwards
// TODO: check bool and rearrange here
let parsed_atom = make_atom(l.clone(), t.to_vec());
let mut updated_list = list_content.to_vec();
updated_list.push(Rc::new(parsed_atom));
emit(
Rc::new(enlist(srcloc.clone(), &updated_list)),
SExpParseState::Empty,
)
if *is_structured {
emit(
// close list and emit it upwards as a complete entity
restructure_list(updated_list, srcloc.clone()),
SExpParseState::Empty,
)
} else {
emit(
// close list and emit it upwards as a complete entity
Rc::new(enlist(srcloc.clone(), &updated_list)),
SExpParseState::Empty,
)
}
}
// analyze this character using the mock "inner state" stored in pp
(_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) {
(_, _, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) {
//
SExpParseResult::Emit(o, current_state) => {
// add result of parse_sexp_step to our list
Expand All @@ -659,6 +702,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
srcloc.ext(&loc),
Rc::new(current_state),
list_copy,
*is_structured,
);
resume(result)
}
Expand All @@ -667,6 +711,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
srcloc.ext(&loc),
Rc::new(rp), // store the returned state from parse_sexp_step in pp
list_content.to_vec(),
*is_structured,
)),
SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards
},
Expand Down Expand Up @@ -779,6 +824,24 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
},
}
}
SExpParseState::StartStructuredList(l) => {
let new_srcloc = l.ext(&loc);
match this_char as char {
'(' => resume(SExpParseState::ParsingList(
// go into a ParsingList
new_srcloc,
Rc::new(SExpParseState::Empty), // we have no inner state
Vec::new(),
true, // note that this is a special StructuredList to be processed later
)),
_ => parse_sexp_step(
// if we don't see a '(' then process it as if the preceding '#' was part of a bareword
loc.clone(),
&SExpParseState::Bareword(loc, vec![b'#']),
this_char,
),
}
} // SExpParseState::StartStructuredList(_) => error(loc, "Missing srcloc"),
}
}

Expand Down Expand Up @@ -837,9 +900,14 @@ impl ParsePartialResult {
SExpParseState::QuotedEscaped(l, _, _) => {
Err((l, "unterminated quoted string with escape".to_string()))
}
SExpParseState::OpenList(l) => Err((l, "Unterminated list (empty)".to_string())),
SExpParseState::ParsingList(l, _, _) => Err((l, "Unterminated mid list".to_string())),
SExpParseState::OpenList(l, _) => Err((l, "Unterminated list (empty)".to_string())),
SExpParseState::ParsingList(l, _, _, _) => {
Err((l, "Unterminated mid list".to_string()))
}
SExpParseState::TermList(l, _, _, _) => Err((l, "Unterminated tail list".to_string())),
SExpParseState::StartStructuredList(l) => {
Err((l, "Unclosed structured list".to_string()))
}
}
}
}
Expand Down
81 changes: 81 additions & 0 deletions src/tests/compiler/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,87 @@ fn compile_test_6() {
);
}

// odd numbered list
#[test]
fn compile_test_8() {
let result =
compile_string(&"(mod (S) (c S (q . #(2000 3000 4000 5000 6000 7000 8000))))".to_string())
.unwrap();
assert_eq!(
result,
"(2 (1 4 5 (1 (2000 3000 . 4000) (5000 . 6000) 7000 . 8000)) (4 (1) 1))".to_string()
);
}

// even numbered list
#[test]
fn compile_test_9() {
let result = compile_string(&"(mod (S) (c S (q . #(a b c d))))".to_string()).unwrap();
assert_eq!(
result,
"(2 (1 4 5 (1 (a . b) c . d)) (4 (1) 1))".to_string()
);
}

// word
#[test]
fn compile_test_10() {
let result = compile_string(&"(mod (S) (c S #fake))".to_string()).unwrap();
assert_eq!(result, "(2 (1 4 5 (1 . fake)) (4 (1) 1))".to_string());
}

// op letter
#[test]
fn compile_test_11() {
let result = compile_string(&"(mod (S) (c S #a))".to_string()).unwrap();
assert_eq!(result, "(2 (1 4 5 (1 . 2)) (4 (1) 1))".to_string());
}

// length 1 list
#[test]
fn compile_test_12() {
let result = compile_string(&"(mod (S) (c S (q . #(100))))".to_string()).unwrap();
assert_eq!(result, "(2 (1 4 5 (1 . 100)) (4 (1) 1))".to_string());
}

// length 0 list
#[test]
fn compile_test_13() {
let result = compile_string(&"(mod (S) (c S (q . #())))".to_string()).unwrap();
assert_eq!(result, "(2 (1 4 5 (1)) (4 (1) 1))".to_string());
}

// length 2 list
#[test]
fn compile_test_14() {
let result = compile_string(&"(mod (S) (c S (q . #(a b))))".to_string()).unwrap();
assert_eq!(result, "(2 (1 4 5 (1 a . b)) (4 (1) 1))".to_string());
}

// use structured list in solution
#[test]
fn compile_test_15() {
let result = run_string_maybe_opt(
&"(mod #(a b c) (- (+ a c) b))".to_string(),
&"(100 20 . 10)".to_string(),
true,
)
.unwrap();
assert_eq!(result.to_string(), "90".to_string());
}

// use structured list in solution
#[test]
fn compile_test_16() {
let result = run_string_maybe_opt(
&"(mod #(a b c) (- (+ a c) b))".to_string(),
&"#(100 20 10)".to_string(),
true,
)
.unwrap();
assert_eq!(result.to_string(), "90".to_string());
}

fn run_test_1_maybe_opt(opt: bool) {
let result = run_string_maybe_opt(
&"(mod () (defun f (a b) (+ (* a a) b)) (f 3 1))".to_string(),
Expand Down
1 change: 1 addition & 0 deletions src/tests/compiler/srcloc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::compiler::srcloc::Srcloc;
// _ is the start to end range.
// . is the target range.
// X is an overlap.

// no _.
#[test]
fn test_overlap_1() {
Expand Down