Chia-Network · matt-o-how · Aug 30, 2023 · Aug 30, 2023 · Aug 30, 2023 · Sep 1, 2023
diff --git a/src/compiler/sexp.rs b/src/compiler/sexp.rs
@@ -204,14 +204,15 @@ enum SExpParseState {
     Bareword(Srcloc, Vec<u8>), //srcloc contains the file, line, column and length for the captured form
     QuotedText(Srcloc, u8, Vec<u8>),
     QuotedEscaped(Srcloc, u8, Vec<u8>),
-    OpenList(Srcloc),
-    ParsingList(Srcloc, Rc<SExpParseState>, Vec<Rc<SExp>>),
+    OpenList(Srcloc, bool),
+    ParsingList(Srcloc, Rc<SExpParseState>, Vec<Rc<SExp>>, bool), // Rc<SExpParseState> is for the inner state of the list, bool is is_structured
     TermList(
         Srcloc,
         Option<Rc<SExp>>,   // this is the second value in the dot expression
         Rc<SExpParseState>, // used for inner parsing
         Vec<Rc<SExp>>,      // list content
     ),
+    StartStructuredList(Srcloc),
 }
 
 #[derive(Debug, PartialEq, Eq)]
@@ -535,17 +536,34 @@ impl SExp {
     }
 }
 
+fn restructure_list(mut this_list: Vec<Rc<SExp>>, srcloc: Srcloc) -> Rc<SExp> {
+    // Check if the vector is empty
+    if this_list.len() == 1 {
+        return Rc::clone(&this_list[0]);
+    }
+    if this_list.is_empty() {
+        return Rc::new(SExp::Nil(srcloc.clone()));
+    }
+    // Remove and get the middle element as the root
+    let mid_index = this_list.len() / 2;
+    let left_subtree = restructure_list(this_list.drain(..mid_index).collect(), srcloc.clone());
+    let right_subtree = restructure_list(this_list, srcloc.clone());
+
+    Rc::new(make_cons(left_subtree, right_subtree))
+}
+
 fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -> SExpParseResult {
     // switch on our state
     match current_state {
         SExpParseState::Empty => match this_char as char {
             // we are not currently in a list
-            '(' => resume(SExpParseState::OpenList(loc)), // move to OpenList state
-            '\n' => resume(SExpParseState::Empty),        // new line, same state
+            '(' => resume(SExpParseState::OpenList(loc, false)), // move to OpenList state
+            '\n' => resume(SExpParseState::Empty),               // new line, same state
             ';' => resume(SExpParseState::CommentText),
             ')' => error(loc, "Too many close parens"),
             '"' => resume(SExpParseState::QuotedText(loc, b'"', Vec::new())), // match on "
             '\'' => resume(SExpParseState::QuotedText(loc, b'\'', Vec::new())), // match on '
+            '#' => resume(SExpParseState::StartStructuredList(loc)), // initiating a structured list
             ch => {
                 if char::is_whitespace(ch) {
                     resume(SExpParseState::Empty)
@@ -601,7 +619,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
             tcopy.push(this_char);
             resume(SExpParseState::QuotedText(srcloc.clone(), *term, tcopy))
         }
-        SExpParseState::OpenList(srcloc) => match this_char as char {
+        SExpParseState::OpenList(srcloc, is_structured) => match this_char as char {
             // we are beginning a new list
             ')' => emit(Rc::new(SExp::Nil(srcloc.ext(&loc))), SExpParseState::Empty), // create a Nil object
             '.' => error(loc, "Dot can't appear directly after begin paren"),
@@ -612,44 +630,69 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
                     srcloc.ext(&loc),
                     Rc::new(current_state), // captured state from our pretend empty state
                     vec![o],
+                    *is_structured,
                 )),
                 SExpParseResult::Resume(current_state) => resume(SExpParseState::ParsingList(
                     // we're still reading the object, resume processing
                     srcloc.ext(&loc),
                     Rc::new(current_state), // captured state from our pretend empty state
                     Vec::new(),
+                    *is_structured,
                 )),
                 SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error
             },
         },
         // We are in the middle of a list currently
-        SExpParseState::ParsingList(srcloc, pp, list_content) => {
+        SExpParseState::ParsingList(srcloc, pp, list_content, is_structured) => {
             // pp is the captured inside-list state we received from OpenList
-            match (this_char as char, pp.borrow()) {
-                ('.', SExpParseState::Empty) => resume(SExpParseState::TermList(
+            match (this_char as char, pp.borrow(), is_structured) {
+                ('.', SExpParseState::Empty, false) => resume(SExpParseState::TermList(
                     // dot notation showing cons cell
                     srcloc.ext(&loc),
                     None,
                     Rc::new(SExpParseState::Empty), // nested state is empty
                     list_content.to_vec(),
                 )),
-                (')', SExpParseState::Empty) => emit(
-                    // close list and emit it upwards as a complete entity
-                    Rc::new(enlist(srcloc.clone(), list_content)),
-                    SExpParseState::Empty,
-                ),
-                (')', SExpParseState::Bareword(l, t)) => {
+                ('.', SExpParseState::Empty, true) => {
+                    error(loc, "Dot expressions disallowed in structured lists")
+                }
+                (')', SExpParseState::Empty, _) => {
+                    if *is_structured {
+                        emit(
+                            // close list and emit it upwards as a complete entity
+                            restructure_list(list_content.to_vec(), srcloc.clone()),
+                            SExpParseState::Empty,
+                        )
+                    } else {
+                        emit(
+                            // close list and emit it upwards as a complete entity
+                            Rc::new(enlist(srcloc.clone(), list_content)),
+                            SExpParseState::Empty,
+                        )
+                    }
+                }
+                (')', SExpParseState::Bareword(l, t), _) => {
                     // you've reached the end of the word AND the end of the list, close list and emit upwards
+                    // TODO: check bool and rearrange here
                     let parsed_atom = make_atom(l.clone(), t.to_vec());
                     let mut updated_list = list_content.to_vec();
                     updated_list.push(Rc::new(parsed_atom));
-                    emit(
-                        Rc::new(enlist(srcloc.clone(), &updated_list)),
-                        SExpParseState::Empty,
-                    )
+                    if *is_structured {
+                        emit(
+                            // close list and emit it upwards as a complete entity
+                            restructure_list(updated_list, srcloc.clone()),
+                            SExpParseState::Empty,
+                        )
+                    } else {
+                        emit(
+                            // close list and emit it upwards as a complete entity
+                            Rc::new(enlist(srcloc.clone(), &updated_list)),
+                            SExpParseState::Empty,
+                        )
+                    }
                 }
                 // analyze this character using the mock "inner state" stored in pp
-                (_, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) {
+                (_, _, _) => match parse_sexp_step(loc.clone(), pp.borrow(), this_char) {
                     //
                     SExpParseResult::Emit(o, current_state) => {
                         // add result of parse_sexp_step to our list
@@ -659,6 +702,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
                             srcloc.ext(&loc),
                             Rc::new(current_state),
                             list_copy,
+                            *is_structured,
                         );
                         resume(result)
                     }
@@ -667,6 +711,7 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
                         srcloc.ext(&loc),
                         Rc::new(rp), // store the returned state from parse_sexp_step in pp
                         list_content.to_vec(),
+                        *is_structured,
                     )),
                     SExpParseResult::Error(l, e) => SExpParseResult::Error(l, e), // propagate error upwards
                 },
@@ -779,6 +824,24 @@ fn parse_sexp_step(loc: Srcloc, current_state: &SExpParseState, this_char: u8) -
                 },
             }
         }
+        SExpParseState::StartStructuredList(l) => {
+            let new_srcloc = l.ext(&loc);
+            match this_char as char {
+                '(' => resume(SExpParseState::ParsingList(
+                    // go into a ParsingList
+                    new_srcloc,
+                    Rc::new(SExpParseState::Empty), // we have no inner state
+                    Vec::new(),
+                    true, // note that this is a special StructuredList to be processed later
+                )),
+                _ => parse_sexp_step(
+                    // if we don't see a '(' then process it as if the preceding '#' was part of a bareword
+                    loc.clone(),
+                    &SExpParseState::Bareword(loc, vec![b'#']),
+                    this_char,
+                ),
+            }
+        } // SExpParseState::StartStructuredList(_) => error(loc, "Missing srcloc"),
     }
 }
 
@@ -837,9 +900,14 @@ impl ParsePartialResult {
             SExpParseState::QuotedEscaped(l, _, _) => {
                 Err((l, "unterminated quoted string with escape".to_string()))
             }
-            SExpParseState::OpenList(l) => Err((l, "Unterminated list (empty)".to_string())),
-            SExpParseState::ParsingList(l, _, _) => Err((l, "Unterminated mid list".to_string())),
+            SExpParseState::OpenList(l, _) => Err((l, "Unterminated list (empty)".to_string())),
+            SExpParseState::ParsingList(l, _, _, _) => {
+                Err((l, "Unterminated mid list".to_string()))
+            }
             SExpParseState::TermList(l, _, _, _) => Err((l, "Unterminated tail list".to_string())),
+            SExpParseState::StartStructuredList(l) => {
+                Err((l, "Unclosed structured list".to_string()))
+            }
         }
     }
 }

diff --git a/src/tests/compiler/compiler.rs b/src/tests/compiler/compiler.rs
@@ -172,6 +172,87 @@ fn compile_test_6() {
     );
 }
 
+// odd numbered list
+#[test]
+fn compile_test_8() {
+    let result =
+        compile_string(&"(mod (S) (c S (q . #(2000 3000 4000 5000 6000 7000 8000))))".to_string())
+            .unwrap();
+    assert_eq!(
+        result,
+        "(2 (1 4 5 (1 (2000 3000 . 4000) (5000 . 6000) 7000 . 8000)) (4 (1) 1))".to_string()
+    );
+}
+
+// even numbered list
+#[test]
+fn compile_test_9() {
+    let result = compile_string(&"(mod (S) (c S (q . #(a b c d))))".to_string()).unwrap();
+    assert_eq!(
+        result,
+        "(2 (1 4 5 (1 (a . b) c . d)) (4 (1) 1))".to_string()
+    );
+}
+
+// word
+#[test]
+fn compile_test_10() {
+    let result = compile_string(&"(mod (S) (c S #fake))".to_string()).unwrap();
+    assert_eq!(result, "(2 (1 4 5 (1 . fake)) (4 (1) 1))".to_string());
+}
+
+// op letter
+#[test]
+fn compile_test_11() {
+    let result = compile_string(&"(mod (S) (c S #a))".to_string()).unwrap();
+    assert_eq!(result, "(2 (1 4 5 (1 . 2)) (4 (1) 1))".to_string());
+}
+
+// length 1 list
+#[test]
+fn compile_test_12() {
+    let result = compile_string(&"(mod (S) (c S (q . #(100))))".to_string()).unwrap();
+    assert_eq!(result, "(2 (1 4 5 (1 . 100)) (4 (1) 1))".to_string());
+}
+
+// length 0 list
+#[test]
+fn compile_test_13() {
+    let result = compile_string(&"(mod (S) (c S (q . #())))".to_string()).unwrap();
+    assert_eq!(result, "(2 (1 4 5 (1)) (4 (1) 1))".to_string());
+}
+
+// length 2 list
+#[test]
+fn compile_test_14() {
+    let result = compile_string(&"(mod (S) (c S (q . #(a b))))".to_string()).unwrap();
+    assert_eq!(result, "(2 (1 4 5 (1 a . b)) (4 (1) 1))".to_string());
+}
+
+// use structured list in solution
+#[test]
+fn compile_test_15() {
+    let result = run_string_maybe_opt(
+        &"(mod #(a b c) (- (+ a c) b))".to_string(),
+        &"(100 20 . 10)".to_string(),
+        true,
+    )
+    .unwrap();
+    assert_eq!(result.to_string(), "90".to_string());
+}
+
+// use structured list in solution
+#[test]
+fn compile_test_16() {
+    let result = run_string_maybe_opt(
+        &"(mod #(a b c) (- (+ a c) b))".to_string(),
+        &"#(100 20 10)".to_string(),
+        true,
+    )
+    .unwrap();
+    assert_eq!(result.to_string(), "90".to_string());
+}
+
 fn run_test_1_maybe_opt(opt: bool) {
     let result = run_string_maybe_opt(
         &"(mod () (defun f (a b) (+ (* a a) b)) (f 3 1))".to_string(),

diff --git a/src/tests/compiler/srcloc.rs b/src/tests/compiler/srcloc.rs
@@ -3,6 +3,7 @@ use crate::compiler::srcloc::Srcloc;
 // _ is the start to end range.
 // . is the target range.
 // X is an overlap.
+
 // no _.
 #[test]
 fn test_overlap_1() {