# Grammar


> This module contains the spannerlog grammar plus utilities that will help the developer assert that the ast he received matches the grammar
that he expects to work with.

>These asserts are useful as a general safety check, and also for finding places in the code that need to change
should the spannerlog grammar be changed.

In [None]:
#| default_exp grammar

In [None]:
#| hide
from nbdev.showdoc import show_doc

%load_ext autoreload
%autoreload 2

In [None]:
#| export
from typing import no_type_check, Set, Sequence, Any, Callable
from typing import Sequence, Dict
from lark import Lark,Token, Tree, Transformer
import yaml
import networkx as nx

import logging
logger = logging.getLogger(__name__)
from graph_rewrite import rewrite,rewrite_iter,draw

from spannerlib.utils import checkLogs


## Formal grammar

In [None]:
#| export
SpannerlogGrammar = r"""
// basic text types
%import common (INT,FLOAT,CNAME,WS,WS_INLINE,NEWLINE,SH_COMMENT)
%ignore WS_INLINE
%ignore SH_COMMENT

_LINE_OVERFLOW_ESCAPE: "\\" NEWLINE
%ignore _LINE_OVERFLOW_ESCAPE

_SEPARATOR: (WS_INLINE | _LINE_OVERFLOW_ESCAPE)+
_STRING_INNER: /.+?/
_STRING_ESC_INNER: _STRING_INNER /(?<!\\)(\\\\)*?/

ESCAPED_STRING : "\"" _STRING_ESC_INNER "\""
                | "'" _STRING_ESC_INNER "'"

string: ESCAPED_STRING

_NEWLINE: NEWLINE

TRUE: "True"
FALSE: "False"

// inline version of bools in cases we dont need to save the value
_TRUE: "True"
_FALSE: "False"

bool: TRUE | FALSE
int: INT
   | "-" INT -> int_neg
float: FLOAT
   | "-" FLOAT -> float_neg

// basic terms

relation_name: CNAME
agg_name: CNAME
var_name: CNAME
free_var_name : CNAME

?const_term: string
        | float
        | int
        | bool
        | "$" var_name

aggregated_free_var: (agg_name "(" free_var_name ")")


?term: const_term
  | free_var_name
  | aggregated_free_var
     


?decl_term: "str" -> decl_string
        | "float" -> decl_float
        | "int" -> decl_int
        | "bool" -> decl_bool


// lists of terms and relations

free_var_name_list: free_var_name ("," free_var_name)*
decl_term_list: decl_term ("," decl_term)*

term_list: term ("," term)*

relation: relation_name "(" term_list ")"
ie_relation: relation_name "(" term_list ")" "->" "(" term_list ")"

// rules


?rule_body_relation: relation
                   | ie_relation

rule_head: relation_name "(" term_list ")"

rule_body_relation_list: rule_body_relation ("," rule_body_relation)*

rule: rule_head "<-" rule_body_relation_list

// statements 

relation_declaration: "new" relation_name "(" decl_term_list ")"
add_fact: relation_name "(" term_list ")"
        | "+" relation_name "(" term_list ")"

remove_fact: "-" relation_name "(" term_list ")" 

query: "?" relation_name "(" term_list ")"

assignment: var_name "=" const_term
        | var_name "=" var_name
        | var_name "=" "read" "(" string ")" -> read_assignment
        | var_name "=" "read" "(" var_name ")" -> read_assignment

?statement: relation_declaration
          | add_fact
          | remove_fact
          | rule
          | query
          | assignment

start: (_NEWLINE)* (statement (_NEWLINE)+)* (statement)?
"""

In [None]:
SpannerlogParser = Lark(SpannerlogGrammar, parser='lalr')


## Manipulating the AST

In [None]:
#| export
import itertools
def lark_to_nx_aux(tree,node_id,g,counter):
    if isinstance(tree, Token):
        g.add_node(node_id,val=tree.value)
    elif isinstance(tree, Tree):
        if len(tree.children) == 0:
            node_key = "val"
        else:
            node_key = "type"
        if isinstance(tree.data,Token):
            node_val = tree.data.value
        else:
            node_val = tree.data
        g.add_node(node_id,**{node_key:node_val})
        for i,child in enumerate(tree.children):
            child_id = next(counter)
            g.add_edge(node_id,child_id,idx=i)
            lark_to_nx_aux(child,child_id,g,counter)
            


def lark_to_nx(t):
    """turn a lark tree into a networkx digraph
    data of inner nodes is saved under a key 'type'
    data of leaves is saved under a key 'val'

    Args:
        t (lark.Tree): lark tree

    Returns:
        nx.Digraph: the nx graph
    """
    g = nx.DiGraph()
    counter = itertools.count()
    lark_to_nx_aux(t,next(counter),g,counter)
    return g
    




In [None]:
#| export
def parse_spannerlog(spannerlog_code: str, # code to parse
    start='start', # start symbol to parse from 
    split_statements=False # whether to split the code into multiple statements, only makes sense if parsing from the start
    ):
    parser = Lark(SpannerlogGrammar, parser='lalr',start=start)
    tree = parser.parse(spannerlog_code)

    if split_statements:
        statement_trees = tree.children
        nx_graphs = [lark_to_nx(s) for s in statement_trees]
        return list(zip(nx_graphs,statement_trees))
    else: 
        return lark_to_nx(tree),tree
    

In [None]:
gs = parse_spannerlog("""
new body1(str,str)
head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)
head(X,min(Y))<-body1(X,Y)
""",split_statements=True)

for nx_tree,lark_tree in gs:
    draw(nx_tree)

In [None]:
#| export
from lark.reconstruct import Reconstructor
def reconstruct(tree):
    parser = Lark(SpannerlogGrammar, parser='lalr',start='start',maybe_placeholders=False)
    return Reconstructor(parser).reconstruct(tree)

In [None]:
assert reconstruct(gs[0][1]) == 'new body1(str,str)'
assert reconstruct(gs[1][1]) == 'head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)'

## Grammar Tests Utils

In [None]:
# testing utils
def tree_to_json(node):
    logger.debug(f'casting the following lark node to json: {node}')
    if isinstance(node, Token):
        return node.value
    if isinstance(node, Tree):
        type = node.data
        if isinstance(type,Token):
            type = type.value
    elif hasattr(node, 'type'):
        type = node.type.value
    else:
        type = node.type
    if len(node.children) == 1:
        return {type: tree_to_json(node.children[0])}
    else:
        return {type: [tree_to_json(child) for child in node.children]}

def tree_to_yaml(node):
    return yaml.dump(tree_to_json(node))

def assert_grammar(start,text,expected_yaml=None):
    nx_tree,lark_tree = parse_spannerlog(text,start=start,split_statements=False)
    yaml_tree = tree_to_yaml(lark_tree)
    json_tree = tree_to_json(lark_tree)
    if expected_yaml is None:
        print(yaml_tree)
    else:
        expected = yaml.safe_load(expected_yaml)
        assert json_tree == expected, f'got unexpected parse results\n{yaml_tree}\nexpected\n{expected_yaml}'
    return nx_tree,lark_tree


In [None]:
isinstance(lark_tree,Tree)

True

In [None]:
lark_tree.data

Token('RULE', 'rule')

In [None]:
nx_tree,lark_tree = parse_spannerlog('head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)',start='rule',split_statements=False)
tree_to_json(lark_tree)
print(tree_to_yaml(lark_tree))

rule:
- rule_head:
  - relation_name: head
  - term_list:
    - free_var_name: X
    - free_var_name: Y
    - free_var_name: W
- rule_body_relation_list:
  - relation:
    - relation_name: body1
    - term_list:
      - free_var_name: X
      - free_var_name: Z
  - relation:
    - relation_name: body2
    - term_list:
      - free_var_name: Z
      - free_var_name: Y
  - ie_relation:
    - relation_name: ie_1
    - term_list:
      - free_var_name: X
      - free_var_name: Y
      - free_var_name: Z
    - term_list:
        free_var_name: W



## Grammar Tests

In [None]:
tree = assert_grammar(
      'start',
      '''
a=3.5
b=c
a="hello world"
a='hello world'
a="hello \
world"
      ''',
    )

#TODO from here make tests with all new primitive types in micropasses
# Bool, Float, Int, String negative int negative float
# TODO add schema merging to utils and use schema utils for type consistency checks

start:
- assignment:
  - var_name: a
  - float: '3.5'
- assignment:
  - var_name: b
  - var_name: c
- assignment:
  - var_name: a
  - string: '"hello world"'
- assignment:
  - var_name: a
  - string: '''hello world'''
- assignment:
  - var_name: a
  - string: '"hello world"'



In [None]:
tree = assert_grammar(
    'start',
    """
    B(1, "2", -3.5,False)
    """,
    """
start:
  add_fact:
  - relation_name: B
  - term_list:
    - int: '1'
    - string: '"2"'
    - float_neg: '3.5'
    - bool: 'False'

    """
)



In [None]:
tree = assert_grammar(
      'rule',
      'head(X,Y,W)<-body1(X,-1),body2(z,-3.5),ie_1(X,$a,Z)->(W)',
"""
rule:
- rule_head:
  - relation_name: head
  - term_list:
    - free_var_name: X
    - free_var_name: Y
    - free_var_name: W
- rule_body_relation_list:
  - relation:
    - relation_name: body1
    - term_list:
      - free_var_name: X
      - int_neg: '1'
  - relation:
    - relation_name: body2
    - term_list:
      - free_var_name: z
      - float_neg: '3.5'
  - ie_relation:
    - relation_name: ie_1
    - term_list:
      - free_var_name: X
      - var_name: a
      - free_var_name: Z
    - term_list:
        free_var_name: W"""
    )


In [None]:
tree = assert_grammar(
      'rule',
      'head(X,Y,W)<-body1(X,Z),body2(Z,Y),ie_1(X,Y,Z)->(W)',
    '''
  rule:
  - rule_head:
    - relation_name: head
    - term_list:
      - free_var_name: X
      - free_var_name: Y
      - free_var_name: W
  - rule_body_relation_list:
    - relation:
      - relation_name: body1
      - term_list:
        - free_var_name: X
        - free_var_name: Z
    - relation:
      - relation_name: body2
      - term_list:
        - free_var_name: Z
        - free_var_name: Y
    - ie_relation:
      - relation_name: ie_1
      - term_list:
        - free_var_name: X
        - free_var_name: Y
        - free_var_name: Z
      - term_list:
          free_var_name: W
  ''')

In [None]:
tree = assert_grammar(
    'rule',
    'head(X,sum(Y),min(W))<-body1(X,Y,Z)',
    """
rule:
- rule_head:
  - relation_name: head
  - term_list:
    - free_var_name: X
    - aggregated_free_var:
      - agg_name: sum
      - free_var_name: Y
    - aggregated_free_var:
      - agg_name: min
      - free_var_name: W
- rule_body_relation_list:
    relation:
    - relation_name: body1
    - term_list:
      - free_var_name: X
      - free_var_name: Y
      - free_var_name: Z
    """
    )

In [None]:
tree = assert_grammar(
    'rule',
    'head(X,sum(Y),min(W),"s",$x)<-body1(X,Y,Z)',
    """
rule:
- rule_head:
  - relation_name: head
  - term_list:
    - free_var_name: X
    - aggregated_free_var:
      - agg_name: sum
      - free_var_name: Y
    - aggregated_free_var:
      - agg_name: min
      - free_var_name: W
    - string: '"s"'
    - var_name: x
- rule_body_relation_list:
    relation:
    - relation_name: body1
    - term_list:
      - free_var_name: X
      - free_var_name: Y
      - free_var_name: Z
    """
    )

## Example rewritting of ast

In [None]:
g,lark_t = parse_spannerlog('head(X,Y,W)<-body1(X,1),body2(1,Y),ie_1(X,Y,1)->(W)')
for match in rewrite_iter(g,lhs='''rel[val:str="relation"]->z[val:str="relation_name"]->y'''):
    print(match['y']['val'])
draw(g,direction='LR')

In [None]:
#TODO currently we cant get all children of a node at once, so we can't make the list of free vars using rhs
for match in rewrite_iter(g,lhs='''terms[type="term_list"]->var[type="free_var_name"]->val''',
                          p='terms[type]',):
        free_var_list = match['terms'].get('free_vars',[])
        free_var_list.append(match['val']['val'])
        match['terms']['free_vars'] = free_var_list

draw(g,direction='LR')
    
    

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()
     