#Grammar

> This module contains the spannerlog grammar plus utilities that will help the developer assert that the ast he received matches the grammar
that he expects to work with.

>These asserts are useful as a general safety check, and also for finding places in the code that need to change
should the spannerlog grammar be changed.

In [None]:
#| default_exp grammar

In [None]:
#| hide
from nbdev.showdoc import show_doc

In [None]:
#| export
from typing import no_type_check, Set, Sequence, Any, Callable
from spannerlib.graphs import GraphBase, EvalState
from typing import Sequence, Dict
from lark import Lark,Token, Tree, Transformer
import yaml

NameError: name 'Any' is not defined

## Formal grammar

In [None]:
#| export
SpannerlogGrammar = r"""
start: (_NEWLINE)* (statement (_NEWLINE)+)* (statement)?

?statement: relation_declaration
          | add_fact
          | remove_fact
          | rule
          | query
          | assignment

assignment: var_name "=" string
          | var_name "=" span
          | var_name "=" int
          | var_name "=" var_name
          | var_name "=" "read" "(" string ")" -> read_assignment
          | var_name "=" "read" "(" var_name ")" -> read_assignment

relation_declaration: "new" _SEPARATOR relation_name "(" decl_term_list ")"

decl_term_list: decl_term ("," decl_term)*

?decl_term: "str" -> decl_string
          | "span" -> decl_span
          | "int" -> decl_int

rule: rule_head "<-" rule_body_relation_list

rule_head: relation_name "(" free_var_name_list ")"

rule_body_relation_list: rule_body_relation ("," rule_body_relation)*

?rule_body_relation: relation
                   | ie_relation

relation: relation_name "(" term_list ")"

ie_relation: relation_name "(" term_list ")" "->" "(" term_list ")"

query: "?" relation_name "(" term_list ")"

term_list: term ("," term)*

?term: const_term
     | free_var_name

add_fact: relation_name "(" const_term_list ")"
        | relation_name "(" const_term_list ")" "<-" _TRUE

remove_fact: relation_name "(" const_term_list ")" "<-" _FALSE

const_term_list: const_term ("," const_term)*

?const_term: span
          | string
          | int
          | var_name

span: "[" int "," int ")"

int: INT -> integer

string: STRING

free_var_name_list: free_var_name ("," free_var_name)*

relation_name: LOWER_CASE_NAME
             | UPPER_CASE_NAME

var_name: LOWER_CASE_NAME

free_var_name : UPPER_CASE_NAME

_TRUE: "True"
_FALSE: "False"

LOWER_CASE_NAME: ("_"|LCASE_LETTER) ("_"|LETTER|DIGIT)*
UPPER_CASE_NAME: UCASE_LETTER ("_"|LETTER|DIGIT)*

_COMMENT: "#" /[^\n]*/

_SEPARATOR: (_WS_INLINE | _LINE_OVERFLOW_ESCAPE)+

STRING: "\"" (_STRING_INTERNAL (_LINE_OVERFLOW_ESCAPE)+)* _STRING_INTERNAL "\""

_LINE_OVERFLOW_ESCAPE: "\\" _NEWLINE

_NEWLINE: CR? LF
CR : /\r/
LF : /\n/

LCASE_LETTER: "a".."z"
UCASE_LETTER: "A".."Z"
LETTER: UCASE_LETTER | LCASE_LETTER
DIGIT: "0".."9"
_WS_INLINE: (" "|/\t/)+
%ignore _WS_INLINE
_STRING_INTERNAL: /.*?/ /(?<!\\)(\\\\)*?/
INT: DIGIT+
%ignore _LINE_OVERFLOW_ESCAPE
%ignore _COMMENT
"""

In [None]:
SpannerlogParser = Lark(SpannerlogGrammar, parser='lalr')


In [None]:
#| export
def parse_spannerlog(spannerlog_code: str,start='start',as_string=False):
    parser = Lark(SpannerlogGrammar, parser='lalr',start=start)
    tree = parser.parse(spannerlog_code)
    if as_string:
        return tree.pretty()
    return tree


## Grammar Tests

In [None]:
# testing utils
def tree_to_json(node):
    if isinstance(node, Token):
        #return {'type': node.type, 'value': node.value}
        return node.value
    if isinstance(node, Tree):
        type = node.data.value
    elif hasattr(node, 'type'):
        type = node.type.value
    else:
        type = node.type
    if len(node.children) == 1:
        return {type: tree_to_json(node.children[0])}
    else:
        return {type: [tree_to_json(child) for child in node.children]}

def tree_to_yaml(node):
    return yaml.dump(tree_to_json(node))

def assert_grammar(start,text,expected_yaml):
    tree = parse_spannerlog(text,start=start)
    expected = yaml.safe_load(expected_yaml)
    gotten = tree_to_json(tree)
    assert gotten == expected, f'got unexpected parse results\n{tree_to_yaml(tree)}\nexpected\n{expected_yaml}'
    return tree


In [None]:
tree= assert_grammar(
    'rule',
    'head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)',
  '''
rule:
- rule_head:
  - relation_name: head
  - free_var_name_list:
    - free_var_name: X
    - free_var_name: Y
    - free_var_name: W
- rule_body_relation_list:
  - relation:
    - relation_name: body1
    - term_list:
      - free_var_name: X
      - free_var_name: Z
  - relation:
    - relation_name: body2
    - term_list:
      - free_var_name: Z
      - free_var_name: Y
  - ie_relation:
    - relation_name: ie_1
    - term_list:
      - free_var_name: X
      - free_var_name: Y
      - free_var_name: Z
    - term_list:
        free_var_name: W
''')

In [None]:
# TODO form here, organize the rest of the grammar tools here from other places and pass tests after migrating them here

In [None]:
print(tree.pretty())

rule
  rule_head
    relation_name	head
    free_var_name_list
      free_var_name	X
      free_var_name	Y
      free_var_name	W
  rule_body_relation_list
    relation
      relation_name	body1
      term_list
        free_var_name	X
        free_var_name	Z
    relation
      relation_name	body2
      term_list
        free_var_name	Z
        free_var_name	Y
    ie_relation
      relation_name	ie_1
      term_list
        free_var_name	X
        free_var_name	Y
        free_var_name	Z
      term_list
        free_var_name	W



# Ensuring Correct Structure in spannerlog's AST



When dealing with spannerlog's Abstract Syntax Tree (AST), it's crucial to ensure that the node structure conforms to the expected grammar. The `spannerlog_expected_children_names_lists` data structure maps every node type in the AST to its expected list(s) of children node names. Below is a comprehensive guide on how to safely modify the spannerlog grammar and update the code to accommodate the changes.

## Expected Children Structure for AST Nodes

For each node in the AST, `spannerlog_expected_children_names_lists` contains a list of its expected children node names. Each node type maps to a list of lists, where each internal list represents a valid set of children node names for that particular node type.

::: {.callout-note}
Some nodes in spannerlog can have variable-length children lists (e.g., `term_list`). Such nodes are not included in `spannerlog_expected_children_names_lists`.
:::
---

## Strategy for Modifying spannerlog Grammar

### 1. Assert Original Node Structure

Before any modifications, assert that each AST node retains its original, expected structure using `spannerlog_expected_children_names_lists`.

```python
# Example usage
lark_passes_utils.assert_expected_node_structure(node, spannerlog_expected_children_names_lists)
```

### 2. Modify the Grammar

Go ahead and make the changes to the spannerlog grammar file.

### 3. Run a Varied spannerlog Program

Execute a spannerlog program that uses a variety of different statements to ensure broad test coverage. Observe where your program crashes due to failed node structure assertions. Modify the code to work with the new grammar and temporarily comment out the node structure assertion(s).

Repeat this step until no crashes occur.

```python
# Temporarily comment this line
# lark_passes_utils.assert_expected_node_structure(node, spannerlog_expected_children_names_lists)
```

### 4. Uncomment the Assertion

Once you are sure the program doesn't crash with the new grammar, uncomment the node structure assertion(s).

```python
# Uncomment this line
lark_passes_utils.assert_expected_node_structure(node, spannerlog_expected_children_names_lists)
```

### 5. Update `spannerlog_expected_children_names_lists`

Finally, update the `spannerlog_expected_children_names_lists` data structure to reflect your new grammar.

---

By following this strategy, you ensure that the new grammar is functional and that the code that interacts with the AST is updated to accommodate the changes.


In [None]:
#| export
spannerlog_expected_children_names_lists: Dict[str, Sequence] = {

    'assignment': [
        ['var_name', 'string'],
        ['var_name', 'integer'],
        ['var_name', 'span'],
        ['var_name', 'var_name'],
    ],

    'read_assignment': [
        ['var_name', 'string'],
        ['var_name', 'var_name']
    ],

    'relation_declaration': [['relation_name', 'decl_term_list']],

    'rule': [['rule_head', 'rule_body_relation_list']],

    'rule_head': [['relation_name', 'free_var_name_list']],

    'relation': [['relation_name', 'term_list']],

    'ie_relation': [['relation_name', 'term_list', 'term_list']],

    'query': [['relation_name', 'term_list']],

    'add_fact': [['relation_name', 'const_term_list']],

    'remove_fact': [['relation_name', 'const_term_list']],

    'span': [
        ['integer', 'integer'],
        []  # allow empty list to support spans that were converted a datatypes.Span instance
    ],

    'integer': [[]],

    'string': [[]],

    'relation_name': [[]],

    'var_name': [[]],

    'free_var_name': [[]]
}