# Session

In [None]:
#| default_exp session

In [None]:
#| hide
from __future__ import annotations

In [None]:
#| hide
from nbdev.showdoc import show_doc
%load_ext autoreload
%autoreload 2

In [None]:
#| export
import csv
import logging
import os
import re
from pathlib import Path
from typing import Tuple, List, Union, Optional, Callable, Type, Iterable, no_type_check, Sequence

In [None]:
#| export
from lark.lark import Lark
import networkx as nx
from pandas import DataFrame
from tabulate import tabulate
import os

In [None]:
#| export
#| output: false
from spannerlib.utils import get_base_file_path,checkLogs
from spannerlib.primitive_types import Span
from spannerlib.engine import SqliteEngine
from spannerlib.ast_node_types import AddFact, RelationDeclaration
from spannerlib.primitive_types import Span, DataTypes, DataTypeMapping
from spannerlib.engine import FALSE_VALUE, TRUE_VALUE
from spannerlib.execution import (Query, FREE_VAR_PREFIX, naive_execution)
from spannerlib.adding_inference_rules_to_term_graph import AddRulesToTermGraph
from spannerlib.optimizations_passes import RemoveUselessRelationsFromRule
from spannerlib.lark_passes import (RemoveTokens, FixStrings, CheckReservedRelationNames,
                                              ConvertSpanNodesToSpanInstances, ConvertStatementsToStructuredNodes,
                                              CheckDefinedReferencedVariables,
                                              CheckReferencedRelationsExistenceAndArity,
                                              CheckReferencedIERelationsExistenceAndArity, CheckRuleSafety,
                                              TypeCheckAssignments, TypeCheckRelations,
                                              SaveDeclaredRelationsSchemas, ResolveVariablesReferences,
                                              ExecuteAssignments, AddStatementsToNetxParseGraph, GenericPass)
#from spannerlib.graphs import TermGraph, NetxStateGraph, GraphBase, TermGraphBase
from spannerlib.symbol_table import SymbolTable, SymbolTableBase
from spannerlib.general_utils import rule_to_relation_name, string_to_span, SPAN_PATTERN, QUERY_RESULT_PREFIX
from spannerlib.passes_utils import LarkNode
from spannerlib.ie_func.json_path import JsonPath, JsonPathFull
from spannerlib.ie_func.nlp import (Tokenize, SSplit, POS, Lemma, NER, EntityMentions, CleanXML, Parse, DepParse, Coref, OpenIE, KBP, Quote, Sentiment, TrueCase)
from spannerlib.ie_func.python_regex import PYRGX, PYRGX_STRING
from spannerlib.ie_func.rust_spanner_regex import RGX, RGX_STRING, RGX_FROM_FILE, RGX_STRING_FROM_FILE
from spannerlib.utils import patch_method, get_base_file_path, get_lib_name
from spannerlib.grammar import parse_spannerlog

In [None]:
#| export
#| hide
CSV_DELIMITER = ";"

# ordered by rgx, json, nlp, etc.
PREDEFINED_IE_FUNCS = [PYRGX, PYRGX_STRING, RGX, RGX_STRING, RGX_FROM_FILE, RGX_STRING_FROM_FILE,
                       JsonPath, JsonPathFull,
                       Tokenize, SSplit, POS, Lemma, NER, EntityMentions, CleanXML, Parse, DepParse, Coref, OpenIE, KBP, Quote, Sentiment,
                       TrueCase]

STRING_PATTERN = re.compile(r"^[^\r\n]+$")

logger = logging.getLogger(__name__)


In [None]:
#| export
def _infer_relation_type(row: Iterable # an iterable of values, extracted from a csv file or a dataframe
                        ) -> Sequence[DataTypes]: # Inferred tpye list of the given relation
    """
    Guess the relation type based on the data.
    We support both the actual types (e.g. 'Span'), and their string representation ( e.g. `"[0,8)"`).

    **@raise** ValueError: if there is a cell inside `row` of an illegal type.
    """
    relation_types = []
    for cell in row:
        try:
            int(cell)  # check if the cell can be converted to integer
            relation_types.append(DataTypes.integer)
        except (ValueError, TypeError):
            if isinstance(cell, Span) or re.match(SPAN_PATTERN, cell):
                relation_types.append(DataTypes.span)
            elif re.match(STRING_PATTERN, cell):
                relation_types.append(DataTypes.string)
            else:
                raise ValueError(f"value doesn't match any datatype: {cell}")

    return relation_types

In [None]:
assert _infer_relation_type([1, 2, 3]) == [ DataTypes.integer,DataTypes.integer,DataTypes.integer]
assert _infer_relation_type([1, 'a']) == [ DataTypes.integer,DataTypes.string]
assert _infer_relation_type(['[0,1)','[0, 1)',Span(1,3)]) == [DataTypes.span,DataTypes.span,DataTypes.span]

In [None]:
#| export
def _verify_relation_types(row: Iterable, expected_types: Iterable[DataTypes]) -> None:
    if _infer_relation_type(row) != expected_types:
        raise Exception(f"row:\n{str(row)}\ndoes not match the relation's types:\n{str(expected_types)}")

In [None]:
#| export
def _text_to_typed_data(term_list: Sequence[DataTypeMapping.term], relation_types: Sequence[DataTypes]) -> List[DataTypeMapping.term]:
    transformed_term_list: List[DataTypeMapping.term] = []
    for str_or_object, rel_type in zip(term_list, relation_types):
        if rel_type == DataTypes.span:
            if isinstance(str_or_object, Span):
                transformed_term_list.append(str_or_object)
            else:
                assert isinstance(str_or_object, str), "a span can only be a Span object or a string"
                transformed_span = string_to_span(str_or_object)
                if transformed_span is None:
                    raise TypeError(f"expected a Span, found this instead: {str_or_object}")
                transformed_term_list.append(transformed_span)

        elif rel_type == DataTypes.integer:
            if isinstance(str_or_object, Span):
                raise TypeError(f"expected an int, found Span instead: {str_or_object}")
            transformed_term_list.append(int(str_or_object))
        else:
            assert rel_type == DataTypes.string, f"illegal type given: {rel_type}"
            transformed_term_list.append(str_or_object)

    return transformed_term_list

In [None]:
#| export
def format_query_results(query: Query, # the query that was executed, and outputted `query_results`
                         query_results: List # the results after executing the aforementioned query
                         ) -> Union[DataFrame, List]: # a false value, a true value, or a dataframe representing the query + its results
    """
    Formats a single result from the engine into a usable format.
    """
    assert isinstance(query_results, list), "illegal results format"

    # check for the special conditions for which we can't print a table: no results were returned or a single
    # empty tuple was returned

    if query_results == FALSE_VALUE:  # empty list := false
        return FALSE_VALUE
    elif query_results == TRUE_VALUE:  # single tuple := true
        return TRUE_VALUE
    else:
        # convert the resulting tuples to a more organized format
        results_matrix = []
        for result in query_results:
            # span tuples are converted to Span objects
            converted_span_result = [Span(term[0], term[1]) if (isinstance(term, tuple) and len(term) == 2)
                                     else term
                                     for term in result]

            results_matrix.append(converted_span_result)

        # get the free variables of the query, they will be used as headers
        query_free_vars = [term for term, term_type in zip(query.term_list, query.type_list)
                           if term_type is DataTypes.free_var_name]

        return DataFrame(data=results_matrix, columns=query_free_vars)


In [None]:
#| export
def tabulate_result(result: Union[DataFrame, List] # the query result (free variable names are the dataframe's column names)
                    ) -> str: # a tabulated string
    """
    Organizes a query result in a table <br>
    for example: <br>
    ```prolog
    {QUERY_RESULT_PREFIX}'lecturer_of(X, "abigail")':
       X
    -------
     linus
     walter
    ```
    There are two cases in which a table won't be printed:

    1. **Query returned no results**: This will result in an output of `[]`.

    2. **Query returned a single empty tuple**: The output will be `[()]`.
    """
    if isinstance(result, DataFrame):
        # query results can be printed as a table
        result_string = tabulate(result, headers="keys", tablefmt="presto", stralign="center", showindex=False)
    else:
        assert isinstance(result, list), "illegal result format"
        if len(result) == 0:
            result_string = "[]"
        else:
            assert len(result) == 1, "illegal result format"
            result_string = "[()]"

    return result_string


In [None]:
#| export
def queries_to_string(query_results: List[Tuple[Query, List]] # List[the Query object used in execution, the execution's results (from engine)]
                      ) -> str: # a tabulated string
    """
    Takes in a list of results from the engine and converts them into a single string, which contains
    either a table, a false value (=`[]`), or a true value (=`[tuple()]`), for each result.

    for example:

    ```prolog
    {QUERY_RESULT_PREFIX}'lecturer_of(X, "abigail")':
       X
    -------
     linus
     walter
    ```
    """

    all_result_strings = []
    query_results = list(filter(None, query_results))  # remove Nones
    for query, results in query_results:
        query_result_string = tabulate_result(format_query_results(query, results))
        query_title = f"{QUERY_RESULT_PREFIX}'{query}':"

        # combine the title and table to a single string and save it to the prints buffer
        titled_result_string = f'{query_title}\n{query_result_string}\n'
        all_result_strings.append(titled_result_string)
    return "\n".join(all_result_strings)


In [None]:
#| export
class Session:
    def __init__(self, 
                 symbol_table: Optional[SymbolTableBase] = None, # symbol table to help with all semantic checks
                 parse_graph: Optional[nx.Digraph] = None, # an AST that contains nodes which represent commands
                 term_graph: Optional[nx.DiGraph] = None): # a graph that holds all the connection between the relations
        """
        A class that serves as the central connection point between various modules in the system.

        This class takes input data and coordinates communication between different modules by sending the relevant parts
        of the input to each module. It also orchestrates the execution of micro passes and handles engine-related tasks. <br>
        Finally, it formats the results before presenting them to the user.

        """
        if symbol_table is None:
            self._symbol_table: SymbolTableBase = SymbolTable()
            self._symbol_table.register_predefined_ie_functions(PREDEFINED_IE_FUNCS)

        else:
            self._symbol_table = symbol_table

        self._parse_graph = nx.DiGraph() if parse_graph is None else parse_graph
        self._term_graph = nx.DiGraph() if term_graph is None else term_graph
        self._engine = SqliteEngine()

        self._pass_stack: List[Type[GenericPass]] = [
            RemoveTokens,
            FixStrings,
            CheckReservedRelationNames,
            ConvertSpanNodesToSpanInstances,
            ConvertStatementsToStructuredNodes,
            CheckDefinedReferencedVariables,
            CheckReferencedRelationsExistenceAndArity,
            CheckReferencedIERelationsExistenceAndArity,
            #TODO agg - add here checkAggFunctionsExistanceAndArity
            CheckRuleSafety,
            TypeCheckAssignments,
            TypeCheckRelations, # TODO agg - add here type check for agg function
            SaveDeclaredRelationsSchemas,
            ResolveVariablesReferences,
            ExecuteAssignments,
             # note that AddStatementsToNetxParseGraph and AddRulesToTermGraph are mutually exclusive, only one will actually run per statement
            # TODO agg remove AddStatementsToNetxParseGraph just use the parsetree directly
            # TODO remove AddRulesToTermGraph, make it a side effect of the execution§
            # AddStatementsToNetxParseGraph,
            # AddRulesToTermGraph # TODO agg - change this pass to also add the group by and aggregation operations
        ]

    def __repr__(self) -> str:
        return "\n".join([repr(self._symbol_table), repr(self._parse_graph)])
    
    def __str__(self) -> str:
        return f'Symbol Table:\n{str(self._symbol_table)}\n\nTerm Graph:\n{str(self._parse_graph)}'

In [None]:
#| export
#| hide
@patch_method
def _run_passes(self: Session, lark_tree: LarkNode, pass_list: list=None) -> None:
    """
    Runs the passes in pass_list on tree, one after another.
    """
    #logger.debug(f"initial lark tree:\n{lark_tree.pretty()}")
    #logger.debug(f"initial term graph:\n{self._term_graph}")

    if pass_list is None:
        pass_list = self._pass_stack

    for curr_pass in pass_list:
        curr_pass_object = curr_pass(parse_graph=self._parse_graph,
                                        symbol_table=self._symbol_table,
                                        term_graph=self._term_graph)
        new_tree = curr_pass_object.run_pass(tree=lark_tree)
        if new_tree is not None:
            lark_tree = new_tree
            #logger.debug(f"lark tree after {curr_pass.__name__}:\n{lark_tree.pretty()}")
    return lark_tree

In [None]:
#| export
#| hide
@patch_method
def get_pass_stack(self: Session) -> List[Type[GenericPass]]:
    """
    @return: the current pass stack.
    """

    return self._pass_stack.copy()

In [None]:
show_doc(Session.get_pass_stack)

---

### Session.get_pass_stack

>      Session.get_pass_stack ()

@return: the current pass stack.

In [None]:
#| export
#| hide
@patch_method
def set_pass_stack(self: Session, user_stack: List[Type[GenericPass]] #  a user supplied pass stack
                    ) -> List[Type[GenericPass]]: # success message with the new pass stack
    """
    Sets a new pass stack instead of the current one.
    """

    if type(user_stack) is not list:
        raise TypeError('user stack should be a list of passes')
    for pass_ in user_stack:
        if not issubclass(pass_, GenericPass):
            raise TypeError('user stack should be a subclass of `GenericPass`')

    self._pass_stack = user_stack.copy()
    return self.get_pass_stack()

In [None]:
show_doc(Session.set_pass_stack)

---

### Session.set_pass_stack

>      Session.set_pass_stack
>                              (user_stack:List[Type[spannerlib.lark_passes.Gene
>                              ricPass]])

Sets a new pass stack instead of the current one.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| user_stack | List[Type[GenericPass]] | a user supplied pass stack |
| **Returns** | **List[Type[GenericPass]]** | **success message with the new pass stack** |

In [None]:
#| export
#| hide
@patch_method
def print_all_rules(self: Session, head: Optional[str] = None # if specified it will print only rules with the given head relation name
                    ) -> None:
    """
    Prints all the rules that are registered.
    """

    self._term_graph.print_all_rules(head)

In [None]:
#| export
#| hide
@patch_method
def _remove_rule_relation_from_symbols_and_engine(self: Session, relation_name: str) -> None:
    """
    Removes the relation from the symbol table and the execution tables.

    @param relation_name: the name of the relation ot remove.
    """
    self._symbol_table.remove_rule_relation(relation_name)
    self._engine.remove_table(relation_name)

In [None]:
#| export
#| hide
@patch_method
def _add_imported_relation_to_engine(self: Session, relation_table: Iterable, relation_name: str, relation_types: Sequence[DataTypes]) -> None:
    symbol_table = self._symbol_table
    engine = self._engine
    # first make sure the types are legal, then we add them to the engine (to make sure
    #  we don't add them in case of an error)
    facts = []

    for row in relation_table:
        _verify_relation_types(row, relation_types)
        typed_line = _text_to_typed_data(row, relation_types)
        facts.append(AddFact(relation_name, typed_line, relation_types))

    # declare relation if it does not exist
    if not symbol_table.contains_relation(relation_name):
        engine.declare_relation_table(RelationDeclaration(relation_name, relation_types))
        symbol_table.add_relation_schema(relation_name, relation_types, False)

    for fact in facts:
        engine.add_fact(fact)

In [None]:
#| export
#| hide
@patch_method
def send_commands_result_into_df(self: Session, commands: str # the commands to run
                                    ) -> Union[DataFrame, List]: # formatted results (possibly a dataframe)
    """
    run commands as usual and output their formatted results into a dataframe (the commands should contain a query)
    """
    commands_results = self.run_commands(commands, print_results=False)
    if len(commands_results) != 1:
        raise Exception("the commands must have exactly one output")

    return format_query_results(*commands_results[0])

In [None]:
#| export
#| hide
@patch_method
def _relation_name_to_query(self: Session, relation_name: str) -> str:
    symbol_table = self._symbol_table
    relation_schema = symbol_table.get_relation_schema(relation_name)
    relation_arity = len(relation_schema)
    query = (f"?{relation_name}(" + ", ".join(f"{FREE_VAR_PREFIX}{i}" for i in range(relation_arity)) + ")")
    return query

In [None]:
#| export
#| hide
@patch_method
def export(self: Session, query=None, # query string to export
            relation_name: str =None, # whether to export an entire relation (either extrinsic or intrinsic), cant be used together with query parameter
            csv_path=None, # whether to export to csv, by default returns as a dataframe
            delimiter: str = CSV_DELIMITER, # the delimeter to use in the csv file
        ) -> Union[DataFrame, List]:
    """Exports the given query or relation to a csv file or a dataframe.
    """
    if query is None and relation_name is None:
        raise Exception("either a query or a relation name must be specified")
    elif query is not None and relation_name is not None:
        raise Exception("either a query or a relation name must be specified, not both")
    
    if relation_name is not None:
        query = self._relation_name_to_query(relation_name)
    
    if csv_path is not None:
        self.send_commands_result_into_csv(query,csv_path,delimiter)
    else:
        return self.send_commands_result_into_df(query)

In [None]:
show_doc(Session.export)

---

### Session.export

>      Session.export (query=None, relation_name:str=None, csv_path=None,
>                      delimiter:str=';')

Exports the given query or relation to a csv file or a dataframe.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| query | NoneType | None | query string to export |
| relation_name | str | None | whether to export an entire relation (either extrinsic or intrinsic), cant be used together with query parameter |
| csv_path | NoneType | None | whether to export to csv, by default returns as a dataframe |
| delimiter | str | ; | the delimeter to use in the csv file |
| **Returns** | **Union[DataFrame, List]** |  |  |

In [None]:
#| export
#| hide

from spannerlib.passes_utils import ParseNodeType

@patch_method
def run_commands(self: Session, query: str, # The user's input
                    print_results: bool = True, # whether to print the results to stdout or not
                    format_results: bool = False, # if this is true, return the formatted result instead of the `[Query, List]` pair
                    ) -> (Union[List[Union[List, List[Tuple], DataFrame]], List[Tuple[Query, List]]]): # the results of every query, in a list
    """
    Generates an AST and passes it through the pass stack.
    """
    query_results = []
    parse_tree = parse_spannerlog(query,start='start')
    engine = self._engine
    term_graph = self._term_graph
    for statement in parse_tree.children:
        clean_statement = self._run_passes(statement, self._pass_stack)

        action_type=clean_statement.data.value
        action_input_value = clean_statement.children[0]
        

        def run_query(q):
            query_plan = plan_query(q,term_graph,engine)
            result = partial_fixed_point_execution(query_plan,engine)

        node_type_to_action = {
            'rule': lambda rule: engine.declare_relation_table(rule.head_relation.as_relation_declaration()),
            'relation_declaration': engine.declare_relation_table,
            'add_fact': engine.add_fact,
            'remove_fact': engine.remove_fact,
            'query': run_query
        }

        if action_type in node_type_to_action:
            action_result = node_type_to_action[action_type](action_input_value)
        
        if action_type == ParseNodeType.QUERY:
            query_result = action_result
            if query_result is not None:
                query_results.append(query_result)
            if print_results:
                print(queries_to_string([query_result]))

        #TODO implement plan_query and partial_fixed_point_execution before testing this
        # query_result = self._execution(parse_graph=self._parse_graph,
        #                                 symbol_table=self._symbol_table,
        #                                 spannerlog_engine=self._engine,
        #                                 term_graph=self._term_graph)

    if format_results:
        return [format_query_results(*query_result) for query_result in query_results]
    else:
        return query_results

In [None]:
show_doc(Session.run_commands)

---

### Session.run_commands

>      Session.run_commands (query:str, print_results:bool=True,
>                            format_results:bool=False)

Generates an AST and passes it through the pass stack.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| query | str |  | The user's input |
| print_results | bool | True | whether to print the results to stdout or not |
| format_results | bool | False | if this is true, return the formatted result instead of the `[Query, List]` pair |
| **Returns** | **Union[List[Union[List, List[Tuple], DataFrame]], List[Tuple[Query, List]]]** |  | **the results of every query, in a list** |

In [None]:
#| export
#| hide
@patch_method
def register(self: Session, ie_function: Callable, ie_function_name: str, in_rel: List[DataTypes],
            out_rel: Union[List[DataTypes], Callable[[int], Sequence[DataTypes]]]) -> None:
    """
    Registers an ie function.

    @see params in `IEFunction`'s __init__.
    """
    self._symbol_table.register_ie_function(ie_function, ie_function_name, in_rel, out_rel)

In [None]:
show_doc(Session.register)

---

### Session.register

>      Session.register (ie_function:Callable, ie_function_name:str,
>                        in_rel:List[spannerlib.primitive_types.DataTypes], out_
>                        rel:Union[List[spannerlib.primitive_types.DataTypes],Ca
>                        llable[[int],Sequence[spannerlib.primitive_types.DataTy
>                        pes]]])

Registers an ie function.

@see params in `IEFunction`'s __init__.

In [None]:
commands = """
        new A(str, str)
        new B(str, str)
        C(X, Y) <- A(X, Y)
        D(X, Y, X) <- C(X, Y)
        D(X, Y, Z) <- A(X, "1"), B(X, Y), ID(X) -> (Y), ID2(Y)->(Z,W)  
    """

def ID(string: str):
        # here we append the input to the output inside the ie function!
        yield f'{string}_id'

def ID2(string: str):
        # here we append the input to the output inside the ie function!
        yield f'{string}_id2_z',f'{string}_id2_w'

session = Session()
session.register(ID,'ID', [DataTypes.string], [DataTypes.string])
session.register(ID2,'ID2', [DataTypes.string], [DataTypes.string,DataTypes.string])

parsed_commands = parse_spannerlog(commands)
clean_statements=[]
for statement in parsed_commands.children[:]:
    clean_statements.append( session._run_passes(statement))
clean_statements

[Tree(Token('RULE', 'relation_declaration'), [RelationDeclaration(A, [<DataTypes.string: 0>, <DataTypes.string: 0>])]),
 Tree(Token('RULE', 'relation_declaration'), [RelationDeclaration(B, [<DataTypes.string: 0>, <DataTypes.string: 0>])]),
 Tree(Token('RULE', 'rule'), [Rule(C(X, Y) <- A(X, Y))]),
 Tree(Token('RULE', 'rule'), [Rule(D(X, Y, X) <- C(X, Y))]),
 Tree(Token('RULE', 'rule'), [Rule(D(X, Y, Z) <- A(X, "1"), B(X, Y), ID(X) -> (Y), ID2(Y) -> (Z, W))])]

In [None]:
r = clean_statements[-1].children[0]
r


Rule(D(X, Y, Z) <- A(X, "1"), B(X, Y), ID(X) -> (Y), ID2(Y) -> (Z, W))

In [None]:
r.head_relation.get_type_list()

[<DataTypes.free_var_name: 3>,
 <DataTypes.free_var_name: 3>,
 <DataTypes.free_var_name: 3>]

In [None]:
type(r.body_relation_list[0])

spannerlib.ast_node_types.Relation

In [None]:
r.body_relation_type_list

[Token('RULE', 'relation'),
 Token('RULE', 'relation'),
 Token('RULE', 'ie_relation'),
 Token('RULE', 'ie_relation')]

### Examples

::: {.callout-note collapse="true"}


##### Examples

In [None]:
commands = """
    new Parent(str, str)
    Parent("Sam", "Noah")
    Parent("Noah", "Austin")
    Parent("Austin", "Stephen")

    GrandParent(G, C) <- Parent(G, M), Parent(M, C)
    #?GrandParent(X, "Austin")
    """
session = Session()
parsed_commands = parse_spannerlog(commands)
clean_statements=[]
for statement in parsed_commands.children[:]:
    clean_statements.append( session._run_passes(statement))
clean_statements
# _ = session.run_commands(commands)
# p_graph,t_graph = session.run_commands('?GrandParent(X, "Austin")',ret_graphs=True)


[Tree(Token('RULE', 'relation_declaration'), [RelationDeclaration(Parent, [<DataTypes.string: 0>, <DataTypes.string: 0>])]),
 Tree(Token('RULE', 'add_fact'), [AddFact(Parent("Sam", "Noah"))]),
 Tree(Token('RULE', 'add_fact'), [AddFact(Parent("Noah", "Austin"))]),
 Tree(Token('RULE', 'add_fact'), [AddFact(Parent("Austin", "Stephen"))]),
 Tree(Token('RULE', 'rule'), [Rule(GrandParent(G, C) <- Parent(G, M), Parent(M, C))])]

In [None]:
parsed_commands.children

[Tree(Token('RULE', 'relation_declaration'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'decl_term_list'), [Tree('decl_string', []), Tree('decl_string', [])])]),
 Tree(Token('RULE', 'add_fact'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'const_term_list'), [Tree(Token('RULE', 'string'), [Token('STRING', '"Sam"')]), Tree(Token('RULE', 'string'), [Token('STRING', '"Noah"')])])]),
 Tree(Token('RULE', 'add_fact'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'const_term_list'), [Tree(Token('RULE', 'string'), [Token('STRING', '"Noah"')]), Tree(Token('RULE', 'string'), [Token('STRING', '"Austin"')])])]),
 Tree(Token('RULE', 'add_fact'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'const_term_list'), [Tree(Token('RULE', 'string'), [Token('STRING', '"Austin"')]), Tree(Token('RULE', 'stri

In [None]:
action_type=clean_statement.data.type
action_input_value = clean_statement.children[0]
action_type,action_input_value

NameError: name 'clean_statement' is not defined

In [None]:
parsed_commands.children[0].data.value

'relation_declaration'

In [None]:
parsed_commands.children

[Tree(Token('RULE', 'relation_declaration'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'decl_term_list'), [Tree('decl_string', []), Tree('decl_string', [])])]),
 Tree(Token('RULE', 'add_fact'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'const_term_list'), [Tree(Token('RULE', 'string'), [Token('STRING', '"Sam"')]), Tree(Token('RULE', 'string'), [Token('STRING', '"Noah"')])])]),
 Tree(Token('RULE', 'add_fact'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'const_term_list'), [Tree(Token('RULE', 'string'), [Token('STRING', '"Noah"')]), Tree(Token('RULE', 'string'), [Token('STRING', '"Austin"')])])]),
 Tree(Token('RULE', 'add_fact'), [Tree(Token('RULE', 'relation_name'), [Token('UPPER_CASE_NAME', 'Parent')]), Tree(Token('RULE', 'const_term_list'), [Tree(Token('RULE', 'string'), [Token('STRING', '"Austin"')]), Tree(Token('RULE', 'stri

In [None]:
clean_statement.data.type

'RULE'

In [None]:
from spannerlib.graphs import draw

In [None]:
draw(session._term_graph)

In [None]:
#| export

def plan_query(query,term_graph,optimization_passes=None):
    if optimization_passes is None:
        optimization_passes = []

    

In [None]:
def partial_fixed_point_execution(g):
    pass
#compute_node

# if node is not part of circle,
# compute it by computing all of its children and performing the nodes operation on them.

# if its a part of a cycle, 
    #compute current iterations


# compute_current_iteration (i)
    # take the (i-1) value of children that are in the cycle
    # take the final value of children that are not in the cycle

    # compute the node based on the children values, assign it to the values of the ith iteration of the node
    # if the value of the node didnt change from last time, mark the node as finished and return


# TODO from here, add good drawing to the graph, seperate query execution from side effect execution


In [None]:
commands = """
    new Parent(str, str)
    Parent("Sam", "Noah")
    Parent("Noah", "Austin")
    Parent("Austin", "Stephen")

    GrandParent(G, C) <- Parent(G, M), Parent(M, C)
    #?GrandParent(X, "Austin")
    """
session = Session()
_ = session.run_commands(commands)
session.run_commands('?GrandParent(X, "Austin")')


NameError: name 'plan_query' is not defined

In [None]:
output = session.export(query='?GrandParent(X, "Austin")')
assert output.to_dict(orient='records') == [{'X': 'Sam'}]
output

Tree(Token('RULE', 'query'), [GrandParent(X, "Austin")])


Unnamed: 0,X
0,Sam


In [None]:
output = session.export(relation_name='GrandParent')
assert output.to_dict(orient='records') == [{'COL0': 'Sam', 'COL1': 'Austin'}, {'COL0': 'Noah', 'COL1': 'Stephen'}]
output


Tree(Token('RULE', 'query'), [GrandParent(COL0, COL1)])


Unnamed: 0,COL0,COL1
0,Sam,Austin
1,Noah,Stephen


:::

::: {.callout-note collapse="true"}

##### Example

In [None]:
def length(string: str) -> Iterable[int]:
        # here we append the input to the output inside the ie function!
        yield len(string)

length_dict = dict(ie_function=length,
                ie_function_name='Length',
                in_rel=[DataTypes.string],
                out_rel=[DataTypes.integer])

session = Session()
session.register(**length_dict)
commands = """new string(str)
            string("a")
            string("d")
            string("a")
            string("ab")
            string("abc")
            string("abcd")

            string_length(Str, Len) <- string(Str), Length(Str) -> (Len)
            ?string_length(Str, Len)
            """
output = session.run_commands(commands)

Tree(Token('RULE', 'relation_declaration'), [string(str)])
Tree(Token('RULE', 'add_fact'), [string("a")])
Tree(Token('RULE', 'add_fact'), [string("d")])
Tree(Token('RULE', 'add_fact'), [string("a")])
Tree(Token('RULE', 'add_fact'), [string("ab")])
Tree(Token('RULE', 'add_fact'), [string("abc")])
Tree(Token('RULE', 'add_fact'), [string("abcd")])
Tree(Token('RULE', 'rule'), [string_length(Str, Len) <- string(Str), Length(Str) -> (Len)])
Tree(Token('RULE', 'query'), [string_length(Str, Len)])
printing results for query 'string_length(Str, Len)':
  Str  |   Len
-------+-------
   a   |     1
   d   |     1
  ab   |     2
  abc  |     3
 abcd  |     4



:::

In [None]:
#| export
#| hide
@patch_method
def remove_rule(self: Session, rule: str # The rule to be removed
                ) -> None:
    """
    Remove a rule from the spannerlog's engine.
    """
    is_last = self._term_graph.remove_rule(rule)
    if is_last:
        relation_name = rule_to_relation_name(rule)
        self._remove_rule_relation_from_symbols_and_engine(relation_name)

In [None]:
show_doc(Session.remove_rule)

---

### Session.remove_rule

>      Session.remove_rule (rule:str)

Remove a rule from the spannerlog's engine.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| rule | str | The rule to be removed |
| **Returns** | **None** |  |

::: {.callout-note collapse="true"}

##### Example

In [None]:
commands = """
    new parent(str, str)
    new grandparent(str, str)
    parent("Liam", "Noah")
    parent("Noah", "Oliver")
    parent("James", "Lucas")
    parent("Noah", "Benjamin")
    parent("Benjamin", "Mason")
    grandparent("Tom", "Avi")
    ancestor(X,Y) <- parent(X,Y)
    ancestor(X,Y) <- grandparent(X,Y)
    ancestor(X,Y) <- parent(X,Z), ancestor(Z,Y)
    """
session = Session()
output = session.run_commands(commands)
session.print_all_rules()

Tree(Token('RULE', 'relation_declaration'), [parent(str, str)])
Tree(Token('RULE', 'relation_declaration'), [grandparent(str, str)])
Tree(Token('RULE', 'add_fact'), [parent("Liam", "Noah")])
Tree(Token('RULE', 'add_fact'), [parent("Noah", "Oliver")])
Tree(Token('RULE', 'add_fact'), [parent("James", "Lucas")])
Tree(Token('RULE', 'add_fact'), [parent("Noah", "Benjamin")])
Tree(Token('RULE', 'add_fact'), [parent("Benjamin", "Mason")])
Tree(Token('RULE', 'add_fact'), [grandparent("Tom", "Avi")])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- parent(X, Y)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- grandparent(X, Y)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)])
Printing all the rules:
	1. ancestor(X, Y) <- parent(X, Y)
	2. ancestor(X, Y) <- grandparent(X, Y)
	3. ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)


after removing first rule:

In [None]:
session.remove_rule("ancestor(X, Y) <- parent(X, Y)")
session.print_all_rules()

Printing all the rules:
	1. ancestor(X, Y) <- grandparent(X, Y)
	2. ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)


:::

In [None]:
#| export
#| hide
@patch_method
def remove_all_rules(self: Session, rule_head: Optional[str] = None # if rule head is not none we remove all rules with rule_head
                        ) -> None:
    """
    Removes all rules from the engine.
    """

    if rule_head is None:
        self._term_graph = TermGraph()
        relations_names = self._symbol_table.remove_all_rule_relations()
        self._engine.remove_tables(relations_names)
    else:
        self._term_graph.remove_rules_with_head(rule_head)
        self._remove_rule_relation_from_symbols_and_engine(rule_head)

In [None]:
show_doc(Session.remove_all_rules)

---

### Session.remove_all_rules

>      Session.remove_all_rules (rule_head:Union[str,NoneType]=None)

Removes all rules from the engine.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| rule_head | Optional[str] | None | if rule head is not none we remove all rules with rule_head |
| **Returns** | **None** |  |  |

::: {.callout-note collapse="true"}

##### Example

In [None]:

commands = """
    new parent(str, str)
    new grandparent(str, str)
    parent("Liam", "Noah")
    grandparent("Tom", "Avi")
    ancestor(X,Y) <- parent(X,Y)
    ancestor(X,Y) <- grandparent(X,Y)
    ancestor(X,Y) <- parent(X,Z), ancestor(Z,Y)
    """
session = Session()
output = session.run_commands(commands)
session.print_all_rules()

Tree(Token('RULE', 'relation_declaration'), [parent(str, str)])
Tree(Token('RULE', 'relation_declaration'), [grandparent(str, str)])
Tree(Token('RULE', 'add_fact'), [parent("Liam", "Noah")])
Tree(Token('RULE', 'add_fact'), [grandparent("Tom", "Avi")])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- parent(X, Y)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- grandparent(X, Y)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)])
Printing all the rules:
	1. ancestor(X, Y) <- parent(X, Y)
	2. ancestor(X, Y) <- grandparent(X, Y)
	3. ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)


after removing all rules:

In [None]:
session.remove_all_rules()
session.print_all_rules()

Printing all the rules:


:::

In [None]:
#| export
#| hide
@patch_method
def clear_relation(self: Session, relation_name: str # The name of the relation to clear
                    ) -> None:
    # @raises: Exception if relation does not exist
    if not self._engine.is_table_exists(relation_name):
        raise Exception(f"Relation {relation_name} does not exist")

    self._engine.clear_relation(relation_name)

In [None]:
show_doc(Session.clear_relation)

---

### Session.clear_relation

>      Session.clear_relation (relation_name:str)

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| relation_name | str | The name of the relation to clear |
| **Returns** | **None** |  |

::: {.callout-note collapse="true"}

##### Example

In [None]:
commands = """
    new parent(str, str)
    parent("Liam", "Noah")
    parent("Noah", "Oliver")
    ?parent(X,Y)
    """
session = Session()
output = session.run_commands(commands)

Tree(Token('RULE', 'relation_declaration'), [parent(str, str)])
Tree(Token('RULE', 'add_fact'), [parent("Liam", "Noah")])
Tree(Token('RULE', 'add_fact'), [parent("Noah", "Oliver")])
Tree(Token('RULE', 'query'), [parent(X, Y)])
printing results for query 'parent(X, Y)':
  X   |   Y
------+--------
 Liam |  Noah
 Noah | Oliver



after clearing parent relation:

In [None]:
session.clear_relation("parent")
commands = """
    ?parent(X,Y)
    """
output = session.run_commands(commands)

Tree(Token('RULE', 'query'), [parent(X, Y)])
printing results for query 'parent(X, Y)':
[]



:::

In [None]:
#| export
#| hide
@patch_method
def send_commands_result_into_csv(self: Session, commands: str, # the commands to run
                                    csv_file_name: Path, # the file into which the output will be written
                                    delimiter: str = CSV_DELIMITER # a csv separator between values
                                    ) -> None:
    """
    run commands as usual and output their formatted results into a csv file (the commands should contain a query)
    """
    commands_results = self.run_commands(commands, print_results=False)
    if len(commands_results) != 1:
        raise Exception("the commands must have exactly one output")

    formatted_result = format_query_results(*commands_results[0])

    if isinstance(formatted_result, DataFrame):
        formatted_result.to_csv(csv_file_name, index=False, sep=delimiter)
    else:
        # true or false
        with open(csv_file_name, "w", newline="") as f:
            writer = csv.writer(f, delimiter=delimiter)
            writer.writerows(formatted_result)

In [None]:
show_doc(Session.send_commands_result_into_csv)

---

### Session.send_commands_result_into_csv

>      Session.send_commands_result_into_csv (commands:str,
>                                             csv_file_name:pathlib.Path,
>                                             delimiter:str=';')

run commands as usual and output their formatted results into a csv file (the commands should contain a query)

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| commands | str |  | the commands to run |
| csv_file_name | Path |  | the file into which the output will be written |
| delimiter | str | ; | a csv separator between values |
| **Returns** | **None** |  |  |

In [None]:
#| export
#| hide
@patch_method
def print_registered_ie_functions(self: Session) -> None:
    """
    Prints information about the registered ie functions.
    """
    self._symbol_table.print_registered_ie_functions()

In [None]:
show_doc(Session.print_registered_ie_functions)

---

### Session.print_registered_ie_functions

>      Session.print_registered_ie_functions ()

Prints information about the registered ie functions.

In [None]:
#| export
#| hide
@patch_method
def remove_ie_function(self: Session, name: str # the name of the ie function to remove
                        ) -> None:
    """
    Removes a function from the symbol table.
    """
    self._symbol_table.remove_ie_function(name)

In [None]:
show_doc(Session.remove_ie_function)

---

### Session.remove_ie_function

>      Session.remove_ie_function (name:str)

Removes a function from the symbol table.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| name | str | the name of the ie function to remove |
| **Returns** | **None** |  |

In [None]:
#| export
#| hide
@patch_method
def remove_all_ie_functions(self: Session) -> None:
    """
    Removes all the ie functions from the symbol table.
    """
    self._symbol_table.remove_all_ie_functions()

In [None]:
show_doc(Session.remove_all_ie_functions)

---

### Session.remove_all_ie_functions

>      Session.remove_all_ie_functions ()

Removes all the ie functions from the symbol table.

In [None]:
#| export
#| hide
@patch_method
def print_all_rules(self: Session, head: Optional[str] = None # if specified it will print only rules with the given head relation name
                    ) -> None:
    """
    Prints all the rules that are registered.
    """

    self._term_graph.print_all_rules(head)

In [None]:
show_doc(Session.print_all_rules)

---

### Session.print_all_rules

>      Session.print_all_rules (head:Union[str,NoneType]=None)

Prints all the rules that are registered.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| head | Optional[str] | None | if specified it will print only rules with the given head relation name |
| **Returns** | **None** |  |  |

::: {.callout-note collapse="true"}

##### Example

In [None]:
commands = """
    new parent(str, str)
    new grandparent(str, str)
    ancestor(X,Y) <- parent(X,Y)
    ancestor(X,Y) <- grandparent(X,Y)
    ancestor(X,Y) <- parent(X,Z), ancestor(Z,Y)
    """
session = Session()
output = session.run_commands(commands)
session.print_all_rules()

Tree(Token('RULE', 'relation_declaration'), [parent(str, str)])
Tree(Token('RULE', 'relation_declaration'), [grandparent(str, str)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- parent(X, Y)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- grandparent(X, Y)])
Tree(Token('RULE', 'rule'), [ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)])
Printing all the rules:
	1. ancestor(X, Y) <- parent(X, Y)
	2. ancestor(X, Y) <- grandparent(X, Y)
	3. ancestor(X, Y) <- parent(X, Z), ancestor(Z, Y)


:::

In [None]:
#| export
#| hide
@patch_method
def import_rel(self: Session, data: Union[DataFrame,Path], #Either a dataframe or a path to a csv file to import.
                             relation_name: str = None, #The name of the relation. If not provided when importing a csv, it will be derived from the file name.
                             delimiter: str = None #The delimiter used when parsing a csv file, defaults to ';'
                             )-> None:
    """Imports a relation into the current session, either from a dataframe or from a csv file.
    """
    global CSV_DELIMITER

    if isinstance(data, DataFrame):
        data_list = data.values.tolist()

        if not isinstance(data_list, list):
            raise Exception("dataframe could not be converted to list")
        if len(data_list) < 1:
            raise Exception("dataframe is empty")
        if relation_name is None:
            raise Exception("relation_name must be provided when importing a dataframe")
        relation_types = _infer_relation_type(data_list[0])
        self._add_imported_relation_to_engine(data_list, relation_name, relation_types)


    elif isinstance(data, (Path,str)):
        csv_file_name = Path(data)
        if not csv_file_name.is_file():
            raise IOError("csv file does not exist")
        if os.stat(csv_file_name).st_size == 0:
            raise IOError("csv file is empty")
        if relation_name is None:
            relation_name = Path(csv_file_name).stem

        if delimiter is None:
            delimiter = CSV_DELIMITER

        with open(csv_file_name) as fh:
            reader = csv.reader(fh, delimiter=delimiter)

            # read first line and go back to start of file - make sure there is no empty line!
            relation_types = _infer_relation_type(next(reader))
            fh.seek(0)

            data_list = reader
            self._add_imported_relation_to_engine(data_list, relation_name, relation_types)
    return



In [None]:
show_doc(Session.import_rel)

---

### Session.import_rel

>      Session.import_rel (data:Union[pandas.core.frame.DataFrame,pathlib.Path],
>                          relation_name:str=None, delimiter:str=None)

Imports a relation into the current session, either from a dataframe or from a csv file.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| data | Union[DataFrame, Path] |  | Either a dataframe or a path to a csv file to import. |
| relation_name | str | None | The name of the relation. If not provided when importing a csv, it will be derived from the file name. |
| delimiter | str | None | The delimiter used when parsing a csv file, defaults to ';' |
| **Returns** | **None** |  |  |

In [None]:
session = Session()
session.import_rel("./sample_data/enrolled.csv", relation_name="enrolled", delimiter=",")
commands = """
    enrolled("abigail", "chemistry")
gpa_str = "abigail 100 jordan 80 gale 79 howard 60"

gpa(Student,Grade) <- py_rgx_string(gpa_str, "(\w+).*?(\d+)")->(Student, Grade),enrolled(Student,X)

?gpa(X,Y)
"""
x = session.run_commands(commands)

Tree(Token('RULE', 'add_fact'), [enrolled("abigail", "chemistry")])
Tree(Token('RULE', 'assignment'), [gpa_str = "abigail 100 jordan 80 gale 79 howard 60"])
Tree(Token('RULE', 'rule'), [gpa(Student, Grade) <- py_rgx_string("abigail 100 jordan 80 gale 79 howard 60", "(\w+).*?(\d+)") -> (Student, Grade), enrolled(Student, X)])
Tree(Token('RULE', 'query'), [gpa(X, Y)])
printing results for query 'gpa(X, Y)':
    X    |   Y
---------+-----
 abigail | 100
 jordan  |  80
  gale   |  79
 howard  |  60



In [None]:
session = Session()
lecturer_df = DataFrame(([["walter","chemistry"], ["linus", "operating_systems"]]))
session.import_rel(lecturer_df, relation_name="lecturer")
commands = """ 
?lecturer(X,Y)
"""
output = session.run_commands(commands)

Tree(Token('RULE', 'query'), [lecturer(X, Y)])
printing results for query 'lecturer(X, Y)':
   X    |         Y
--------+-------------------
 walter |     chemistry
 linus  | operating_systems



In [None]:
output

[(lecturer(X, Y), [('walter', 'chemistry'), ('linus', 'operating_systems')])]

In [None]:
#with checkLogs():
#TODO from here make logging work with dict logging like in llmcourse,
# then make an example with an ie function so we see how the engines execute it

# Then make a tests example with aggregation functions

### Tests

In [None]:
commands = """
        new A(str, str)
        new B(str, str)
        C(X, Y) <- A(X, Y)
        D(X, Y, X) <- C(X, Y)
        D(X, Y, Z) <- A(X, 1), B(X, Y), ID(X) -> (Y), ID2(Y)->(Z,W)  
    """

def ID(string: str):
        # here we append the input to the output inside the ie function!
        yield f'{string}_id'

def ID2(string: str):
        # here we append the input to the output inside the ie function!
        yield f'{string}_id2_z',f'{string}_id2_w'

session = Session()
session.register(ID,'ID', [DataTypes.string], [DataTypes.string])
session.register(ID2,'ID2', [DataTypes.string], [DataTypes.string,DataTypes.string])

_ = session.run_commands(commands)
# p_graph,t_graph = session.run_commands('?GP(G, "Austin" , L)',ret_graphs=True)


AttributeError: 'Session' object has no attribute 'register'

In [None]:
commands = """
    new Parent(str, str)
    Parent("Sam", "Noah")
    Parent("Noah", "Austin")
    Parent("Austin", "Stephen")

    GrandParent(G, C) <- Parent(G, M), Parent(M, C)

    GP(G,C,L)<- GrandParent(G,C),Len(G)->(L)
    GP(G,C,L)<- Parent(G,C),Len(G)->(L)
    """

def length(string: str) -> Iterable[int]:
        # here we append the input to the output inside the ie function!
        yield len(string)

session = Session()
session.register(length,'Len', [DataTypes.string], [DataTypes.integer])

_ = session.run_commands(commands)
# p_graph,t_graph = session.run_commands('?GP(G, "Austin" , L)',ret_graphs=True)


AttributeError: 'Session' object has no attribute 'register'

In [None]:
session = Session()
session.register(length,'Len', [DataTypes.string], [DataTypes.integer])

parse_tree = parse_spannerlog("""
    new Parent(str, str)
    Parent("Sam", "Noah")
    Parent("Noah", "Austin")
    Parent("Austin", "Stephen")

    GrandParent(G, C) <- Parent(G, M), Parent(M, C)

    GP(G,C,L)<- GrandParent(G,C),Len(G)->(L)
    GP(G,C,L)<- Parent(G,C),Len(G)->(L)
    """)
print(parse_tree.pretty())

start
  relation_declaration
    relation_name	Parent
    decl_term_list
      decl_string
      decl_string
  add_fact
    relation_name	Parent
    const_term_list
      string	"Sam"
      string	"Noah"
  add_fact
    relation_name	Parent
    const_term_list
      string	"Noah"
      string	"Austin"
  add_fact
    relation_name	Parent
    const_term_list
      string	"Austin"
      string	"Stephen"
  rule
    rule_head
      relation_name	GrandParent
      free_var_name_list
        free_var_name	G
        free_var_name	C
    rule_body_relation_list
      relation
        relation_name	Parent
        term_list
          free_var_name	G
          free_var_name	M
      relation
        relation_name	Parent
        term_list
          free_var_name	M
          free_var_name	C
  rule
    rule_head
      relation_name	GP
      free_var_name_list
        free_var_name	G
        free_var_name	C
        free_var_name	L
    rule_body_relation_list
      relation
        relation_name	GrandPare

In [None]:
new_statements = [
    session._run_passes(s, session.get_pass_stack()) for s in parse_tree.children
]
new_statements

[Tree(Token('RULE', 'relation_declaration'), [Parent(str, str)]),
 Tree(Token('RULE', 'add_fact'), [Parent("Sam", "Noah")]),
 Tree(Token('RULE', 'add_fact'), [Parent("Noah", "Austin")]),
 Tree(Token('RULE', 'add_fact'), [Parent("Austin", "Stephen")]),
 Tree(Token('RULE', 'rule'), [GrandParent(G, C) <- Parent(G, M), Parent(M, C)]),
 Tree(Token('RULE', 'rule'), [GP(G, C, L) <- GrandParent(G, C), Len(G) -> (L)]),
 Tree(Token('RULE', 'rule'), [GP(G, C, L) <- Parent(G, C), Len(G) -> (L)])]

In [None]:
new_statements[0].data.type

'RULE'

In [None]:
new_statements[0].children[0]

Parent(str, str)

In [None]:
# TODO from here take out the type of action and the type itself and put it in the execution loop in run commands

In [None]:
print(session._parse_graph.pretty())

(__spannerlog_root) (computed) root
    (0) (computed) relation_declaration: Parent(str, str)
    (1) (computed) add_fact: Parent("Sam", "Noah")
    (2) (computed) add_fact: Parent("Noah", "Austin")
    (3) (computed) add_fact: Parent("Austin", "Stephen")
    (4) (computed) rule: GrandParent(G, C) <- Parent(G, M), Parent(M, C)
    (5) (computed) rule: GP(G, C, L) <- GrandParent(G, C), Len(G) -> (L)
    (6) (computed) rule: GP(G, C, L) <- Parent(G, C), Len(G) -> (L)



In [None]:
session._engine.declare_relation_table()

In [None]:
spannerlog_engine.declare_relation_table(rule.head_relation.as_relation_declaration())

In [None]:
# session.export('?GP(G, C , L)')

In [None]:
print(session._parse_graph.pretty())

(__spannerlog_root) (computed) root
    (0) (computed) relation_declaration: Parent(str, str)
    (1) (computed) add_fact: Parent("Sam", "Noah")
    (2) (computed) add_fact: Parent("Noah", "Austin")
    (3) (computed) add_fact: Parent("Austin", "Stephen")
    (4) (computed) rule: GrandParent(G, C) <- Parent(G, M), Parent(M, C)
    (5) (computed) rule: GP(G, C, L) <- GrandParent(G, C), Len(G) -> (L)
    (6) (computed) rule: GP(G, C, L) <- Parent(G, C), Len(G) -> (L)



In [None]:
list(session._parse_graph._graph.nodes(data=True))

[('__spannerlog_root',
  {'type': 'root', 'state': <EvalState.COMPUTED: 'computed'>}),
 (0,
  {'type': <ParseNodeType.RELATION_DECLARATION: 'relation_declaration'>,
   'value': Parent(str, str),
   'state': <EvalState.COMPUTED: 'computed'>}),
 (1,
  {'type': <ParseNodeType.ADD_FACT: 'add_fact'>,
   'value': Parent("Sam", "Noah"),
   'state': <EvalState.COMPUTED: 'computed'>}),
 (2,
  {'type': <ParseNodeType.ADD_FACT: 'add_fact'>,
   'value': Parent("Noah", "Austin"),
   'state': <EvalState.COMPUTED: 'computed'>}),
 (3,
  {'type': <ParseNodeType.ADD_FACT: 'add_fact'>,
   'value': Parent("Austin", "Stephen"),
   'state': <EvalState.COMPUTED: 'computed'>}),
 (4,
  {'type': <ParseNodeType.RULE: 'rule'>,
   'value': GrandParent(G, C) <- Parent(G, M), Parent(M, C),
   'state': <EvalState.COMPUTED: 'computed'>}),
 (5,
  {'type': <ParseNodeType.RULE: 'rule'>,
   'value': GP(G, C, L) <- GrandParent(G, C), Len(G) -> (L),
   'state': <EvalState.COMPUTED: 'computed'>}),
 (6,
  {'type': <ParseNodeT

In [None]:
list(session._parse_graph._graph.edges(data=True))

[('__spannerlog_root', 0, {}),
 ('__spannerlog_root', 1, {}),
 ('__spannerlog_root', 2, {}),
 ('__spannerlog_root', 3, {}),
 ('__spannerlog_root', 4, {}),
 ('__spannerlog_root', 5, {}),
 ('__spannerlog_root', 6, {})]

In [None]:
print(t_graph.pretty())

(__spannerlog_root) (not_computed) root
    (GrandParent) (not_computed) rule_rel: GrandParent(G, C)
        (0) (not_computed) union
            (1) (not_computed) project: ['G', 'C']
                (2) (not_computed) join: {'C': [(Parent(M, C), 1)], 'M': [(Parent(M, C), 0), (Parent(G, M), 1)], 'G': [(Parent(G, M), 0)]}
                    (3) (not_computed) get_rel: Parent(M, C)
                    (4) (not_computed) get_rel: Parent(G, M)
    (GP) (not_computed) rule_rel: GP(G, C, L)
        (5) (not_computed) union
            (6) (not_computed) project: ['G', 'C', 'L']
                (7) (not_computed) join: {'C': [(GrandParent(G, C), 1)], 'L': [(Len(G) -> (L), 1)], 'G': [(Len(G) -> (L), 0), (GrandParent(G, C), 0)]}
                    (8) (not_computed) get_rel: GrandParent(G, C)
                        (GrandParent) (not_computed) rule_rel: GrandParent(G, C)
                    (9) (not_computed) calc: Len(G) -> (L)
                        (8) (not_computed) get_rel: GrandParen

In [None]:
with checkLogs():
    session.export('?GP(G, C , L)')

spannerlib.adding_inference_rules_to_term_graph - DEBUG - term graph after AddRulesToTermGraph:
(__spannerlog_root) (not_computed) root
    (GrandParent) (not_computed) rule_rel: GrandParent(G, C)
        (0) (not_computed) union
            (1) (not_computed) project: ['G', 'C']
                (2) (not_computed) join: {'C': [(Parent(M, C), 1)], 'M': [(Parent(M, C), 0), (Parent(G, M), 1)], 'G': [(Parent(G, M), 0)]}
                    (3) (not_computed) get_rel: Parent(M, C)
                    (4) (not_computed) get_rel: Parent(G, M)
    (GP) (not_computed) rule_rel: GP(G, C, L)
        (5) (not_computed) union
            (6) (not_computed) project: ['G', 'C', 'L']
                (7) (not_computed) join: {'C': [(GrandParent(G, C), 1)], 'L': [(Len(G) -> (L), 1)], 'G': [(Len(G) -> (L), 0), (GrandParent(G, C), 0)]}
                    (8) (not_computed) get_rel: GrandParent(G, C)
                        (GrandParent) (not_computed) rule_rel: GrandParent(G, C)
                    (9) (

### Old exports

In [None]:
# #| export
# #| hide
# @patch_method
# def import_relation_from_csv(self: Session, csv_file_name: Path, #The path to the CSV file that is being imported
#                              relation_name: str = None, #The name of the relation. If not provided, it will be derived from the CSV file name
#                              delimiter: str = CSV_DELIMITER #The delimiter used in the CSV file
#                              )-> None: 
#     if not Path(csv_file_name).is_file():
#         raise IOError("csv file does not exist")

#     if os.stat(csv_file_name).st_size == 0:
#         raise IOError("csv file is empty")

#     # the relation_name is either an argument or the file's name
#     if relation_name is None:
#         relation_name = Path(csv_file_name).stem

#     with open(csv_file_name) as fh:
#         reader = csv.reader(fh, delimiter=delimiter)

#         # read first line and go back to start of file - make sure there is no empty line!
#         relation_types = _infer_relation_type(next(reader))
#         fh.seek(0)

#         self._add_imported_relation_to_engine(reader, relation_name, relation_types)

In [None]:
# show_doc(Session.import_relation_from_csv)

::: {.callout-note collapse="true"}

##### Example

In [None]:
# session = Session()
# session.import_relation_from_csv("./sample_data/enrolled.csv", relation_name="enrolled", delimiter=",")
# commands = """
#     enrolled("abigail", "chemistry")
# gpa_str = "abigail 100 jordan 80 gale 79 howard 60"

# gpa(Student,Grade) <- py_rgx_string(gpa_str, "(\w+).*?(\d+)")->(Student, Grade),enrolled(Student,X)

# ?gpa(X,Y)
# """
# x = session.run_commands(commands)

:::

In [None]:
# #| export
# #| hide
# @patch_method
# def import_relation_from_df(self: Session, relation_df: DataFrame, #The DataFrame containing the data to be imported
#                             relation_name: str #The name to be assigned to the relation. It can be an existing relation or a new one
#                             ) -> None:
#     data = relation_df.values.tolist()

#     if not isinstance(data, list):
#         raise Exception("dataframe could not be converted to list")

#     if len(data) < 1:
#         raise Exception("dataframe is empty")

#     relation_types = _infer_relation_type(data[0])

#     self._add_imported_relation_to_engine(data, relation_name, relation_types)

In [None]:
# show_doc(Session.import_relation_from_df)

::: {.callout-note collapse="true"}

##### Example

In [None]:
# session = Session()
# lecturer_df = DataFrame(([["walter","chemistry"], ["linus", "operating_systems"]]))
# session.import_relation_from_df(lecturer_df, relation_name="lecturer")
# commands = """ 
# ?lecturer(X,Y)
# """
# output = session.run_commands(commands)

:::

In [None]:
#| hide
if __name__ == "__main__":
    my_session = Session()
    my_session.run_commands("""
                        new parent(str,str)
                        parent("Jack", "Alex")
                        parent("Michael", "Jackson")
                        parent("Van", "Diesel")
                        ?parent(X,Y)
            """)
    my_session.clear_relation("parent")
    output = my_session.run_commands("""
                # Expect to see empty relation
                ?parent(X,Y)
            """)
    assert str(output) == "[(parent(X, Y), [])]"
    output = my_session.run_commands("""
                        # Check smooth refilling
                        parent("John", "Cena")
                        ?parent(X,Y)
                    """)
    assert str(output) == "[(parent(X, Y), [('John', 'Cena')])]"
    

printing results for query 'parent(X, Y)':
    X    |    Y
---------+---------
  Jack   |  Alex
 Michael | Jackson
   Van   | Diesel

printing results for query 'parent(X, Y)':
[]

printing results for query 'parent(X, Y)':
  X   |  Y
------+------
 John | Cena



In [None]:
#| hide
