# LHS Parsing

In [None]:
#| default_exp lhs

In [None]:
#| hide
from nbdev.showdoc import show_doc
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Overview

This module defines the grammar of the LHS that is given by the user to the *rewrite* function of the library.
The module is also responsible for parsing of the pattern sent as LHS, into a networkX graph representing the template to search.

The module converts the declerative constraints regarding the properties of the nodes and edges in the LHS, to imperative functions that are checked together with the 'condition' parameter of *rewrite*.

### Requirements

In [None]:
from lark import Lark

In [None]:
Lark

lark.lark.Lark

In [None]:
#| export
import copy
from collections.abc import Callable
import networkx as nx
from lark import Transformer, Lark
from lark import UnexpectedCharacters, UnexpectedToken
from graph_rewrite.match_class import Match
from graph_rewrite.core import GraphRewriteException
from graph_rewrite.core import _create_graph,  _graphs_equal, draw
from collections import defaultdict
from graph_rewrite.match_class import _convert_to_edge_name
from typing import Tuple, Union

### Grammar
The grammar induces the allowed syntax of a legal LHS string that can be provided by the user. 

In [None]:
#| export
lhs_parser = Lark(r"""
    %import common.INT -> INT 
    %import common.FLOAT -> FLOAT
    %import common.ESCAPED_STRING -> STRING
    %import common.WS -> WS
    %ignore WS

    NAMED_VERTEX: /[_a-zA-Z0-9]+/
    ANONYMUS: "_"
    ATTR_NAME: /[_a-zA-Z0-9]+/
    TYPE:  "int" | "str" | "bool" | "float"
    BOOLEAN: "True" | "False"
    NATURAL_NUMBER: /[1-9][0-9]*/
    INDEX: /[0-9]+/

    value: FLOAT | INT | BOOLEAN | STRING

    attribute: ATTR_NAME [":" TYPE] ["=" value]
    attributes: "[" attribute ("," attribute)* "]"

    multi_connection: "-" NATURAL_NUMBER [attributes] "->" 
    connection: ["-" attributes]"->"
              | multi_connection
    
    index_vertex: NAMED_VERTEX "<" INDEX ("," INDEX)* ">"

    vertex: NAMED_VERTEX [attributes]
    | index_vertex [attributes]
    | ANONYMUS [attributes]

    pattern: vertex (connection vertex)*
    patterns: pattern ("," pattern)* 
    lhs: patterns [";" patterns]

    """, parser="lalr", start='lhs', debug=True)


# multi_connection: "-" NATURAL_NUMBER "+" [attributes] "->"  - setting for the "-num+->" feature

### Transformer
The transformer is designed to return the networkX graph representing the patterns received by the user.

For each branch, the appropriate method will be called with the children of the branch as its argument, and its return value will replace the matching node in the tree.

The secondary task of the transformer is to collect the node/edge type and constant node/edge value constraints, such that they are added to the 'condition' parameter to be checked later. Thus, the lhsTransformer contains a python dictionary *constraints* which accumulates the constraints from all components of the parsed graph.

In [None]:
#| export
RenderFunc = Callable[[Match], any] # type of a function to render a parameter

In [None]:
#| export
cnt:int = 0 # unique id for anonymous vertices
class graphRewriteTransformer(Transformer):
    def __init__(self, visit_tokens: bool = True, component: str = "LHS", match: Match = None, render_funcs: dict[str, RenderFunc] = {}) -> None:
        super().__init__(visit_tokens)
        # general
        self.component = component
        # RHS parameters
        self.match = match
        self.render_funcs = render_funcs
        # LHS parameters
        self.constraints = {}
        self.cnt = 0

    def STRING(self, arg):
        # remove " "
        return arg[1:-1] 
    
    def BOOLEAN(self, arg):
        return bool(arg)
    
    def INT(self, arg):
        # can be negative
        return int(arg)
    
    def FLOAT(self, arg):
        return float(arg)
    
    def NATURAL_NUMBER(self, number): 
        # represents number of duplications
        return int(number)
    
    def USER_VALUE(self, arg):
        # get the variable name
        variable = arg[2:-2]
        # extract the actual value supplied by the user - can be of any type.
        return self.render_funcs[variable](self.match) 
    
    def value(self, args): 
        # one argument encased in a list
        return args[0]
    
    def attribute(self, args): 
        # if an optional token was not parsed, None is placed in the parse tree.
        # if type and value are not allowed, then None is entered manualy.
        if self.component == "P": 
            attr_name = args[0]
            type, value = None, None
        else:
            attr_name, type, value = args       
        
        if type is not None and type not in ["int", "str", "bool", "float"]:
            raise GraphRewriteException(f"Type '{type}' is not one of the types supported by the LHS parser: int, str, bool, float or None. If another type is needed, please use the condition function.")
            
        return (attr_name, type, value)
    
    def attributes(self, attributes): # a list of triples 
        # return a packed list of the attribute names.
        attr_names, constraints = {}, {}
        for attribute in attributes:
            # will be added to the graph itself
            attr_name, type, value = attribute
            if self.component == "LHS":
                attr_names[str(attr_name)] = (None, None)
                # will be added to the condition function
                constraints[str(attr_name)] = (type, value) 
            else:
                attr_names[str(attr_name)] = (type, value)

        return (attr_names, constraints)

    def multi_connection(self, args): # +
        # return the list of attributes(strings), add a special attribute to denote number of duplications.
        number, attributes = args
        if attributes == None:
            attributes = ({},{})
        # add a special atrribute to handle duplications during construction
        attributes[0]["$dup"] = number 
        return attributes

    def connection(self, args): 
        # (tuple of dicts: attributes, constraints. attributes is of the form: attribute -> val)
        attributes = args[0]
        if attributes == None:
            attributes = ({},{})
        # add a special atrribute to handle duplications during construction
        attributes[0]["$dup"] = 1
        return (attributes, True)

    def ANONYMUS(self, _): #
        # return a dedicated name for anonymus (string), and an empty indices list.
        x = "_anonymous_node_" + str(self.cnt)
        self.cnt += 1
        return (x, [])

    def index_vertex(self, args):
        # return the main name of the vertex, and a list of the indices specified.
        main_name_tup, *numbers = args #numbers is a list
        return (main_name_tup[0], list(numbers))
    
    def NAMED_VERTEX(self, name):
        if name.startswith("_anonymous_node_"):
            raise GraphRewriteException("_anonymous_node_ prefix cannot be used for a vertex name in LHS")
        return (str(name), [])

    def vertex(self, args): # (vertex_tuple: tuple, attributes: list)
        # attributes is a empty list/ a list containing a tuple: (names dict, constraints dict)
        vertex_tuple, *attributes = args 
        name, indices_list = vertex_tuple

        # create new name
        indices = ",".join([str(num) for num in indices_list])
        if len(indices) == 0:
            new_name = str(name)
        else:
            new_name =  name + "<" + indices + ">" 

        # no attributes to handle
        if attributes[0] == None:
            return (new_name, {})
        
        # now that we have the vertex name we add the attribute constraints:
        # vertices may appear multiple times in LHS thus we unite the constraints. We assume there cannot be contradicting constraints.
        attribute_names, constraints = attributes[0] 
        # the second element of the tuple is the constraints dict: attr_name -> (value,type)
        if self.component == "LHS":
            if new_name not in self.constraints.keys():
                self.constraints[new_name] = {}
            self.constraints[new_name] = self.constraints[new_name] | constraints 
        return (new_name, attribute_names)

    def pattern(self, args):
        # 1) unpack lists of vertices and connections.
        vertex, *rest = args
        conn, vertices = list(rest)[::2], list(rest)[1::2]
        vertices.insert(0,vertex)
        # 2) create a networkX graph:
            # Future feature: if there is a special attribute with TRUE (deterministic), dumplicate the connection $dup times.
        G = nx.DiGraph()

        # simplified vertion - ignore duplications
        G.add_nodes_from(vertices)
        edge_list = []
        for i,edge in enumerate(conn):
            # for now the duplication feature is not included so we remove the $dup attribute
            # we handeled None in the connection rule.
            attribute_names, constraints = edge[0]
            attribute_names.pop("$dup", 0)
            # ignore edge[1] - determinism flag. edge[0] is the tuple of dicts of attributes.
            vertex_name_pos = 0 # each item in vertices is a tuple (vertex_name, attrs)
            edge_list.append((vertices[i][vertex_name_pos], vertices[i+1][vertex_name_pos], attribute_names)) 

            # add constraints - we assume an edge only appears once in LHS
            if self.component == "LHS":
                filtered_cons = dict(filter(lambda tup: not tup[1] == (None, None), constraints.items()))
                # check if filtered_cons is not empty - there are concrete constraints
                if filtered_cons: 
                    self.constraints[str(vertices[i][vertex_name_pos]) + "->" + str(vertices[i+1][vertex_name_pos])] = filtered_cons

        # more complex vertion - duplications
        # create a recursive function that adds the vertices and edges, 
        # that calls itself by the number of duplications on each level.

        G.add_edges_from(edge_list)
        return G

    def empty(self, _):
        return nx.DiGraph()
    
    def patterns(self, args):
        g, *graphs = args
        graphs.insert(0,g)
        # unite all the patterns into a single graph
        G = nx.DiGraph()

        # dict of dicts (node_name -> attribute -> None/someValue)
        combined_attributes = dict() 
        new_nodes = []
        new_edges = []
        for graph in graphs:
            for node in graph.nodes:
                if node not in combined_attributes.keys():
                    combined_attributes[node] = {}
                combined_attributes[node] = combined_attributes[node] | graph.nodes.data()[node]
                #unite the dicts for each
                new_nodes.append(node) 
            for edge in graph.edges:
                # we assumed edges cannot appear more than once in LHS
                combined_attributes[edge[0] + "->" + edge[1]] = graph.edges[edge[0],edge[1]]
                new_edges.append(edge)
        # filtered_attr = dict(filter(lambda _,value: not value == (None, None), combined_attributes.items()))
        G.add_nodes_from([(node, combined_attributes[node]) for node in new_nodes])
        G.add_edges_from([(node1, node2, combined_attributes[node1 + "->" + node2]) for (node1,node2) in new_edges])
        
        #sent as a module output and replaces condition.
        constraints = copy.deepcopy(self.constraints)
        self.constraints = {}
        return (G, constraints) 

    def lhs(self, args):
        return [arg for arg in args if arg is not None]


### Transformer Application
The following function applies the transformer on an LHS-formatted string provided by the user, to extract the constraints and the resulting networkx greaph. Then it unites the constraints with the constraints given in the *condition* function supplied by the user, so that they will be inforced together later on.

In [None]:
#| export
def lhs_to_graph(lhs: str, debug: bool = False) -> Tuple[nx.DiGraph, nx.DiGraph]:
    """
    Converts a LHS string to a networkx graph and extracts constraints.

    Args:
    - lhs: str - a string representing the LHS of a rule.
    - debug: bool - if True, returns the parse tree and the collections tree, instead of the graphs.

    Returns:
    - Tuple[nx.DiGraph, nx.DiGraph] - a tuple of two networkx graphs: the single nodes graph and the collections graph.
    """
    try:
        parse_tree = lhs_parser.parse(lhs)
        if debug:
            return parse_tree, None

        transformer = graphRewriteTransformer(component="LHS")
        patterns_list = transformer.transform(parse_tree)  # List of (graph, constraints)

        if len(patterns_list) == 1:
            single_nodes_graph, single_nodes_constraints = patterns_list[0]
            collections_graph = nx.DiGraph()
            collections_constraints = {}
        elif len(patterns_list) == 2:
            single_nodes_graph, single_nodes_constraints = patterns_list[0]
            collections_graph, collections_constraints = patterns_list[1]
        else:
            raise GraphRewriteException("Unexpected number of pattern sets in LHS.")

        _add_constraints_to_graph(single_nodes_graph, single_nodes_constraints)
        _add_constraints_to_graph(collections_graph, collections_constraints)

        return single_nodes_graph, collections_graph

    except (BaseException, UnexpectedCharacters, UnexpectedToken) as e:
        raise GraphRewriteException('Unable to convert LHS: {}'.format(e))


def _add_constraints_to_graph(graph: nx.DiGraph, constraints: dict):
    """
    Adds constraints to a graph, by going over the constraints dict and adding them to the graph, 
    such that each node or edge has the a dictionary of constraints - attr_name -> (attr_type_str, attr_value).

    Args:
    - graph: nx.DiGraph - the graph to add constraints to.
    - constraints: dict - the constraints to add to the graph.
    """

    for graph_obj, attr_constraints in constraints.items():
        for attr_name in attr_constraints.keys():
            if graph_obj in graph.nodes:
                graph.nodes[graph_obj][attr_name] = attr_constraints[attr_name]
            else: 
                node1, node2 = graph_obj.split("->")
                graph.edges[node1, node2][attr_name] = attr_constraints[attr_name]


In [None]:
res, _ = lhs_to_graph("a")


### Tests
Note that throughout these tests, we use the naive condition which returns True for all matches. We chose to do that since this module is all about parsing, which is not affected by the condition.
The condition will be checked appropriately in the module that actually uses it, the Matcher module.

#### Basic Connections

In [None]:
res, _ = lhs_to_graph("a")
expected = _create_graph(['a'], [])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

res, _ = lhs_to_graph("a->b")
expected = _create_graph(['a','b'], [('a','b')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

res, _ = lhs_to_graph("a -> b")
expected = _create_graph(['a','b'], [('a','b')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

res, _ = lhs_to_graph("a->b -> c")
expected = _create_graph(['a','b','c'], [('a','b'),('b','c')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

res, _ = lhs_to_graph("a->b -> a")
expected = _create_graph(['a','b'], [('a','b'),('b','a')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

# anonymus vertices
res, _ = lhs_to_graph("a->_->b->_")
expected = _create_graph(['a','b','_anonymous_node_0','_anonymous_node_1'], [('a','_anonymous_node_0'),('_anonymous_node_0','b'),('b','_anonymous_node_1')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

#### Attributes

In [None]:
res, _ = lhs_to_graph("a[x=1]->b")
expected = _create_graph([('a', {'x': (None, 1)}), ('b', {})], [('a', 'b', {})])
assert(_graphs_equal(expected, res))

res, _ = lhs_to_graph("a[x]->b")
expected = _create_graph([('a', {'x': (None, None)}), ('b', {})], [('a', 'b', {})])
assert(_graphs_equal(expected, res))

In [None]:
res, _ = lhs_to_graph("a-[x=1]->b")
expected = _create_graph([('a', {}), ('b', {})], [('a', 'b', {'x': (None, 1)})])
assert(_graphs_equal(expected, res))

res, _ = lhs_to_graph("a-[x]->b")
expected = _create_graph([('a', {}), ('b', {})], [('a', 'b', {'x': (None, None)})])
assert(_graphs_equal(expected, res))

draw(res)

In [None]:
res, _ = lhs_to_graph("a[x=5]")
expected = _create_graph([('a', {'x': (None, 5)})], [])
assert(_graphs_equal(expected, res))

res, _ = lhs_to_graph("a-[x=5]->b")
expected = _create_graph([('a', {}), ('b', {})], [('a', 'b', {'x': (None, 5)})])
assert(_graphs_equal(expected, res))

res, _ = lhs_to_graph("a<1,2>[x=5, y: int = 6]")
expected = _create_graph([('a<1,2>', {'x': (None, 5), 'y': ('int', 6)})], [])
assert(_graphs_equal(expected, res))

res, _ = lhs_to_graph("a[a]-[x]->b[ b ] -> c[ c ]")
expected = _create_graph([('a', {'a': (None, None)}), ('b', {'b': (None, None)}), ('c', {'c': (None, None)})],[])
assert(_graphs_equal(expected, res))

In [None]:
t2 = lhs_to_graph('''rel[val:str="relation"]->z[val:str="relation_name"]''',True)
g2, _ = lhs_to_graph('''rel[val:str="relation"]->z[val:str="relation_name"]''' )
draw(g2)
print(t2)

(Tree(Token('RULE', 'lhs'), [Tree(Token('RULE', 'patterns'), [Tree(Token('RULE', 'pattern'), [Tree(Token('RULE', 'vertex'), [Token('NAMED_VERTEX', 'rel'), Tree(Token('RULE', 'attributes'), [Tree(Token('RULE', 'attribute'), [Token('ATTR_NAME', 'val'), Token('TYPE', 'str'), Tree(Token('RULE', 'value'), [Token('STRING', '"relation"')])])])]), Tree(Token('RULE', 'connection'), [None]), Tree(Token('RULE', 'vertex'), [Token('NAMED_VERTEX', 'z'), Tree(Token('RULE', 'attributes'), [Tree(Token('RULE', 'attribute'), [Token('ATTR_NAME', 'val'), Token('TYPE', 'str'), Tree(Token('RULE', 'value'), [Token('STRING', '"relation_name"')])])])])])]), None]), None)


### String Type Attributes ###
Testing the parser's ability to handle attributes of type string, specifically when they are explicitly provided in the LHS string using formats such as "<value>" or other string representation methods.

In [None]:
# Using a non string value to have a basic test
res, _ = lhs_to_graph("A[attrA=1]->B")
expected = _create_graph([('A', {'attrA': (None, 1)}), 'B'], [('A', 'B', {})])
assert(_graphs_equal(expected, res))

# Using a string value
res, _ = lhs_to_graph("A[attrA=\"1\"]->B")
expected = _create_graph([('A', {'attrA': (None, '1')}), 'B'], [('A', 'B', {})])
assert(_graphs_equal(expected, res))

lhs, _ = lhs_to_graph('''rel[val="relation"]->z[val="relation_name"]''')

#### multiple patterns

In [None]:
res, _ = lhs_to_graph("a->b -> c, c-> d") 
expected = _create_graph(['a','b','c','d'], [('a','b'),('b','c'),('c','d')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

res, _ = lhs_to_graph("a->b -> c, d") 
expected = _create_graph(['a','b','c', 'd'], [('a','b'),('b','c')])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

res, _ = lhs_to_graph("a->b[z] -> c[y], c[x=5]->b[r]") 
expected = _create_graph([('a', {}), ('b', {'z': (None, None), 'r': (None, None)}), ('c', {'y': (None, None), 'x': (None, 5)})],[])
assert(_graphs_equal(expected, res))
#_plot_graph(res)

In [None]:
res_single, res_collection = lhs_to_graph('c[type="course"];s[type="student"]->c') 
print(res_single.nodes.data())
print(res_collection.nodes.data())

[('c', {'type': (None, 'course')})]
[('s', {'type': (None, 'student')}), ('c', {})]


In [None]:
res_single, _ = lhs_to_graph('c[type="course"]->_;s[type="student"]->c')
print(res_single.nodes.data())

[('c', {'type': (None, 'course')}), ('_anonymous_node_0', {})]


In [None]:
try:
    res_single, _ = lhs_to_graph('c[type="course"]->_anonymous_node_0')
except GraphRewriteException as e:
    # Should raise an exception: Unable to convert LHS: Error trying to process rule "NAMED_VERTEX": _anonymous_node_ prefix cannot be used for a vertex name in LHS
   print(e)

Unable to convert LHS: Error trying to process rule "NAMED_VERTEX":

_anonymous_node_ prefix cannot be used for a vertex name in LHS


#### Collections (two graph, ";" delimiter)

In [None]:
res1, res2 = lhs_to_graph("a->b -> c, c-> d; a->b -> c, d") 
expected1 = _create_graph(['a','b','c','d'], [('a','b'),('b','c'),('c','d')])
expected2 = _create_graph(['a','b','c', 'd'], [('a','b'),('b','c')])
assert(_graphs_equal(expected1, res1))
assert(_graphs_equal(expected2, res2))

try:
    lhs_to_graph("a->b -> c, c-> d;")
except GraphRewriteException as e:
    # Should raise an exception: Unable to convert LHS: Unexpected token Token('$END', ''), Expected one of: 
    # * ANONYMUS
	# * NAMED_VERTEX
    print(e)
    
try:
    lhs_to_graph(";a->b -> c, c-> d")
except GraphRewriteException as e:
    # Should raise an exception: Unable to convert LHS: Unexpected token Token('$END', ''), Expected one of: 
    # * ANONYMUS
	# * NAMED_VERTEX
    print(e)



Unable to convert LHS: Unexpected token Token('$END', '') at line 1, column 17.
Expected one of: 
	* NAMED_VERTEX
	* ANONYMUS

Unable to convert LHS: Unexpected token Token('SEMICOLON', ';') at line 1, column 1.
Expected one of: 
	* NAMED_VERTEX
	* ANONYMUS
Previous tokens: [None]



In [None]:
res1, res2 = lhs_to_graph('x->y[attr];x->z')
expected1 = _create_graph([('x'), ('y', {'attr': (None, None)})], [('x', 'y')])
expected2 = _create_graph([('x'), ('z')], [('x', 'z')])
assert _graphs_equal(expected1, res1)
assert _graphs_equal(expected2, res2)

res1, res2 = lhs_to_graph('x->y;x->z[attr]')
expected1 = _create_graph([('x'), ('y')], [('x', 'y')])
expected2 = _create_graph([('x'), ('z', {'attr': (None, None)})], [('x', 'z')])
assert _graphs_equal(expected1, res1)
assert _graphs_equal(expected2, res2)

# Export

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()
     