# Grammar


> This module contains the spannerlog grammar plus utilities that will help the developer assert that the ast he received matches the grammar
that he expects to work with.

>These asserts are useful as a general safety check, and also for finding places in the code that need to change
should the spannerlog grammar be changed.

In [None]:
#| default_exp grammar

In [None]:
#| hide
from nbdev.showdoc import show_doc

%load_ext autoreload
%autoreload 2

In [None]:
#| export
from typing import no_type_check, Set, Sequence, Any, Callable
from typing import Sequence, Dict
from lark import Lark,Token, Tree, Transformer
import yaml
import networkx as nx

import logging
logger = logging.getLogger(__name__)
from graph_rewrite import rewrite,rewrite_iter,draw

from spannerlib.utils import checkLogs, UniqueId


## Formal grammar

In [None]:
#| export
SpannerlogGrammar = r"""
start: (_NEWLINE)* (statement (_NEWLINE)+)* (statement)?

?statement: relation_declaration
          | add_fact
          | remove_fact
          | rule
          | query
          | assignment

assignment: var_name "=" string
          | var_name "=" span
          | var_name "=" int
          | var_name "=" var_name
          | var_name "=" "read" "(" string ")" -> read_assignment
          | var_name "=" "read" "(" var_name ")" -> read_assignment

relation_declaration: "new" _SEPARATOR relation_name "(" decl_term_list ")"

decl_term_list: decl_term ("," decl_term)*

?decl_term: "str" -> decl_string
          | "span" -> decl_span
          | "int" -> decl_int

rule: rule_head "<-" rule_body_relation_list

rule_head: relation_name "(" free_var_name_list ")"

rule_body_relation_list: rule_body_relation ("," rule_body_relation)*

?rule_body_relation: relation
                   | ie_relation

relation: relation_name "(" term_list ")"

ie_relation: relation_name "(" term_list ")" "->" "(" term_list ")"

query: "?" relation_name "(" term_list ")"

term_list: term ("," term)*

?term: const_term
     | free_var_name

add_fact: relation_name "(" const_term_list ")"
        | relation_name "(" const_term_list ")" "<-" _TRUE

remove_fact: relation_name "(" const_term_list ")" "<-" _FALSE

const_term_list: const_term ("," const_term)*

?const_term: span
          | string
          | int
          | var_name

span: "[" int "," int ")"

int: INT -> integer

string: STRING

free_var_name_list: free_var_name ("," free_var_name)*

relation_name: LOWER_CASE_NAME
             | UPPER_CASE_NAME

var_name: LOWER_CASE_NAME

free_var_name : UPPER_CASE_NAME

_TRUE: "True"
_FALSE: "False"

LOWER_CASE_NAME: ("_"|LCASE_LETTER) ("_"|LETTER|DIGIT)*
UPPER_CASE_NAME: UCASE_LETTER ("_"|LETTER|DIGIT)*

_COMMENT: "#" /[^\n]*/

_SEPARATOR: (_WS_INLINE | _LINE_OVERFLOW_ESCAPE)+

STRING: "\"" (_STRING_INTERNAL (_LINE_OVERFLOW_ESCAPE)+)* _STRING_INTERNAL "\""

_LINE_OVERFLOW_ESCAPE: "\\" _NEWLINE

_NEWLINE: CR? LF
CR : /\r/
LF : /\n/

LCASE_LETTER: "a".."z"
UCASE_LETTER: "A".."Z"
LETTER: UCASE_LETTER | LCASE_LETTER
DIGIT: "0".."9"
_WS_INLINE: (" "|/\t/)+
%ignore _WS_INLINE
_STRING_INTERNAL: /.*?/ /(?<!\\)(\\\\)*?/
INT: DIGIT+
%ignore _LINE_OVERFLOW_ESCAPE
%ignore _COMMENT
"""

In [None]:
SpannerlogParser = Lark(SpannerlogGrammar, parser='lalr')


## Manipulating the AST

In [None]:
import itertools

In [None]:
itertools.count

itertools.count

In [None]:
#| export
import itertools
def lark_to_nx_aux(tree,node_id,g,counter):
    if isinstance(tree, Token):
        g.add_node(node_id,val=tree.value)
    elif isinstance(tree, Tree):
        if len(tree.children) == 0:
            node_key = "val"
        else:
            node_key = "type"
        if isinstance(tree.data,Token):
            node_val = tree.data.value
        else:
            node_val = tree.data
        g.add_node(node_id,**{node_key:node_val})
        for i,child in enumerate(tree.children):
            child_id = next(counter)
            g.add_edge(node_id,child_id,idx=i)
            lark_to_nx_aux(child,child_id,g,counter)
            


def lark_to_nx(t):
    """turn a lark tree into a networkx digraph
    data of inner nodes is saved under a key 'type'
    data of leaves is saved under a key 'val'

    Args:
        t (lark.Tree): lark tree

    Returns:
        nx.Digraph: the nx graph
    """
    g = nx.DiGraph()
    counter = itertools.count()
    lark_to_nx_aux(t,next(counter),g,counter)
    return g
    




In [None]:
#| export
def parse_spannerlog(spannerlog_code: str, # code to parse
                     start='start', # non terminal symbol to start parsing from
                     as_string=False, # whether to return the parse tree as a pretty string
                     as_tree=False, # whether to return as a lark Tree object
                     as_nx=True, # whether to return as an networkx graph
                     split_statements=False, # whether to return a list of individual statements
                     ):
    parser = Lark(SpannerlogGrammar, parser='lalr',start=start)
    tree = parser.parse(spannerlog_code)
    if as_string:
        if split_statements:
            return [s.pretty() for s in tree.children]
        return tree.pretty()
    if as_tree:
        if split_statements:
            return tree.children
        return tree
    if as_nx:
        if split_statements:
            return [lark_to_nx(s) for s in tree.children]
        return lark_to_nx(tree)


In [None]:
gs = parse_spannerlog("""
new body1(str,str)
head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)
""",as_nx=True,split_statements=True)

for g in gs:
    draw(g)

## Grammar Tests

In [None]:
g=gs[-1]
draw(g)

In [None]:
d =  {'a':1,'b':2}
d.pop('a'),d

(1, {'b': 2})

In [None]:
# testing utils
def tree_to_json(node):
    logger.debug(f'casting the following lark node to json: {node}')
    if isinstance(node, Token):
        #return {'type': node.type, 'value': node.value}
        return node.value
    if isinstance(node, Tree):
        type = node.data
    elif hasattr(node, 'type'):
        type = node.type.value
    else:
        type = node.type
    if len(node.children) == 1:
        return {type: tree_to_json(node.children[0])}
    else:
        return {type: [tree_to_json(child) for child in node.children]}

In [None]:
def tree_to_json(node):
    logger.debug(f'casting the following lark node to json: {node}')
    if isinstance(node, Token):
        #return {'type': node.type, 'value': node.value}
        return node.value
    if isinstance(node, Tree):
        type = node.data
    elif hasattr(node, 'type'):
        type = node.type.value
    else:
        type = node.type
    if len(node.children) == 1:
        return {type: tree_to_json(node.children[0])}
    else:
        return {type: [tree_to_json(child) for child in node.children]}

def tree_to_yaml(node):
    return yaml.dump(tree_to_json(node))


In [None]:
# testing utils


def assert_grammar(start,text,expected_yaml):
    tree = parse_spannerlog(text,start=start,as_tree=True)
    expected = yaml.safe_load(expected_yaml)
    gotten = tree_to_json(tree)
    assert gotten == expected, f'got unexpected parse results\n{tree_to_yaml(tree)}\nexpected\n{expected_yaml}'
    return tree


In [None]:
tree = assert_grammar(
      'rule',
      'head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)',
    '''
  rule:
  - rule_head:
    - relation_name: head
    - free_var_name_list:
      - free_var_name: X
      - free_var_name: Y
      - free_var_name: W
  - rule_body_relation_list:
    - relation:
      - relation_name: body1
      - term_list:
        - free_var_name: X
        - free_var_name: Z
    - relation:
      - relation_name: body2
      - term_list:
        - free_var_name: Z
        - free_var_name: Y
    - ie_relation:
      - relation_name: ie_1
      - term_list:
        - free_var_name: X
        - free_var_name: Y
        - free_var_name: Z
      - term_list:
          free_var_name: W
  ''')

In [None]:
print(yaml)

<module 'yaml' from '/Users/dean/miniconda3/envs/span/lib/python3.11/site-packages/yaml/__init__.py'>


In [None]:
nx.tree_data

<function networkx.readwrite.json_graph.tree.tree_data(G, root, ident='id', children='children')>

In [None]:
g = parse_spannerlog('head(X,Y,W)<-body1(X,1),body2(1,Y),ie_1(X,Y,1)->(W)')
for match in rewrite_iter(g,lhs='''rel[val:str="relation"]->z[val:str="relation_name"]->y'''):
    print(match['y']['val'])
draw(g,direction='LR')

In [None]:
#TODO currently we cant get all children of a node at once, so we can't make the list of free vars using rhs
for match in rewrite_iter(g,lhs='''terms[type="term_list"]->var[type="free_var_name"]->val''',
                          p='terms[type]',):
        free_var_list = match['terms'].get('free_vars',[])
        free_var_list.append(match['val']['val'])
        match['terms']['free_vars'] = free_var_list

draw(g,direction='LR')
    
    

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()
     