# Primitive datatypes
> data types used by the engine etc to encode rules relations etc

In [None]:
#| default_exp data_types

In [None]:
#| hide
from nbdev.showdoc import show_doc
from IPython.display import display, HTML
%load_ext autoreload
%autoreload 2

In [None]:
#| export
from abc import ABC, abstractmethod
import pytest
from collections import defaultdict

import pandas as pd
from pathlib import Path
from typing import no_type_check, Set, Sequence, Any,Optional,List,Callable,Dict,Union
from pydantic import BaseModel
import networkx as nx
import itertools
from graph_rewrite import draw, draw_match, rewrite, rewrite_iter
from spannerlib.utils import serialize_graph,serialize_df_values,checkLogs,get_new_node_name
from spannerlib.span import Span,SpanParser

import logging
logger = logging.getLogger(__name__)

In [None]:
#| export
from enum import Enum
from typing import Any
from pydantic import ConfigDict


class Var(BaseModel):
    name: str
    def __hash__(self):
        return hash(self.name)

class FreeVar(BaseModel):
    name: str
    def __hash__(self):
        return hash(self.name)




class RelationDefinition(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    name: str
    scheme: List

class Relation(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    name: str
    terms: List
    agg: Optional[Dict[FreeVar,str]] = None

class IEFunction(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    name: str
    func: Callable
    # either a fixed schema, or a callable that takes the expected arity and given us the schema
    in_schema: Union[List,Callable] 
    out_schema: Union[List,Callable]


class AGGFunction(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    name: str
    func: Union[Callable,str]
    in_schema: List 
    out_schema: List

class IERelation(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    name: str
    in_terms: List
    out_terms: List
    def __hash__(self):
        hash_str = f'''{self.name}_in_{'_'.join([str(x) for x in self.in_terms])}_out_{'_'.join([str(x) for x in self.out_terms])}'''
        return hash(hash_str)

class Rule(BaseModel):
    model_config = ConfigDict(arbitrary_types_allowed=True)
    head: Relation
    body: List[Union[Relation,IERelation]]

In [None]:
#| export
def pretty(obj):
    """pretty printing dataclasses for user messages,
    making them look like spannerlog code instead of python code"""
    
    if isinstance(obj,Span):
        return f"[{obj.start},{obj.end})"
    elif isinstance(obj,(Var,FreeVar)):
        return obj.name
    elif isinstance(obj,RelationDefinition):
        return f"{obj.name}({','.join(pretty(o) for o in obj.scheme)})"
    elif isinstance(obj,Relation):
        if obj.agg:
            pretty_terms = [f"{obj.agg[term]}({pretty(term)})" if (term in obj.agg) else pretty(term) for term in obj.terms]
        else:
            pretty_terms = [pretty(t) for t in obj.terms]
        return f"{obj.name}({','.join(pretty_terms)})"
    elif isinstance(obj,IERelation):
        return f"{obj.name}({','.join(pretty(o) for o in obj.in_terms)}) -> ({','.join(pretty(o) for o in obj.out_terms)})"
    elif isinstance(obj,IEFunction):
        return f"{obj.name}({','.join(pretty(o) for o in obj.in_schema)}) -> ({','.join(pretty(o) for o in obj.out_schema)})"
    elif isinstance(obj,Rule):
        return f"{pretty(obj.head)} <- {','.join(pretty(o) for o in obj.body)}"
    elif isinstance(obj,type):
        return obj.__name__
    else:
        return str(obj)

In [None]:
rule = Rule(
    head=Relation(name='R', terms=[FreeVar(name='X'), FreeVar(name='Y'), FreeVar(name='Z')]),
    body=[
        Relation(name='S', terms=[FreeVar(name='X'), Span(start=1,end=4)]),
        IERelation(name='T', in_terms=[FreeVar(name='X'), 1], out_terms=[FreeVar(name='Y'), FreeVar(name='Z')])
    ])
assert pretty(rule) == 'R(X,Y,Z) <- S(X,[1,4)),T(X,1) -> (Y,Z)'

In [None]:
schema = RelationDefinition(name='R', scheme=[int, str, Span])
assert pretty(schema) == 'R(int,str,Span)'
ie_func_schema = IEFunction(name='f', in_schema=[int, str], out_schema=[str, Span],func=lambda x,y: (y,Span(1,2)))
assert pretty(ie_func_schema) == 'f(int,str) -> (str,Span)'

In [None]:
agg_head = Relation(name='R', terms=[FreeVar(name='X'), FreeVar(name='Y'), FreeVar(name='Z')],agg={FreeVar(name='Y'):'sum'})
assert pretty(agg_head) == 'R(X,sum(Y),Z)'

In [None]:
#| export
import re
STRING_PATTERN = re.compile(r"^[^\r\n]+$")


def _infer_relation_schema(row) -> Sequence[type]: # Inferred type list of the given relation
    """
    Guess the relation type based on the data.
    We support both the actual types (e.g. 'Span'), and their string representation ( e.g. `"[0,8)"`).

    **@raise** ValueError: if there is a cell inside `row` of an illegal type.
    """
    relation_types = []
    for cell in row:
        try:
            int(cell)  # check if the cell can be converted to integer
            relation_types.append(int)
        except (ValueError, TypeError):
            if isinstance(cell, Span) or SpanParser.parse(cell):
                relation_types.append(Span)
            elif re.match(STRING_PATTERN, cell):
                relation_types.append(str)
            else:
                raise ValueError(f"value doesn't match any datatype: {cell}")

    return relation_types

In [None]:
assert _infer_relation_schema([1, 2, 3]) == [ int,int,int]
assert _infer_relation_schema([1, 'a']) == [ int,str]
assert _infer_relation_schema(['[0,1)','[0, 1)',Span(1,3)]) == [Span,Span,Span]

In [None]:
#|hide
import nbdev; nbdev.nbdev_export()
     