In [1]:
from enum import Enum
from dataclasses import dataclass

class TokenType(Enum):
    CLAUSE = 0
    LPAREN = 1
    RPAREN = 2
    AND = 3
    OR = 4


@dataclass
class Token:
    type: TokenType
    value: any = None
    
    def __repr__(self):
        return self.type.name + (f"({self.value})" if self.value != None else "")    

In [2]:
import re
WHITESPACE = ' \n\t'


class Lexer:
    def __init__(self, text):
        self.text = iter(text)
        self.advance()
    
    def advance(self):
        try:
            self.current_char = next(self.text)
        except StopIteration:
            self.current_char = None
    
    def generate_tokens(self):
        while self.current_char != None:
            if self.current_char in WHITESPACE:
                self.advance()
            elif self.current_char == "(":
                self.advance()
                yield Token(TokenType.LPAREN)
            elif self.current_char == ")":
                self.advance()
                yield Token(TokenType.RPAREN)            
            else:
                temp = self.generate_expre()
                for tmp in temp:
                    if tmp == "and":
                        yield Token(TokenType.AND)
                    elif tmp == "or":
                        yield Token(TokenType.OR)
                    else:
                        yield Token(TokenType.CLAUSE, tmp)
                
                
    def generate_expre(self):
        expre = self.current_char
        self.advance()
        
        while self.current_char != None and not(self.current_char in "()"):
            expre += self.current_char
            self.advance()
            
        expre = re.split("(and)",expre)
        
        tmp = []
        for e in expre:
            e = re.split("(or)",e)
            tmp.extend(e)        
        expre = [ e.strip() for e in tmp]
        expre = [ e for e in expre if e ]

        return expre
            

In [3]:
sample1 = "(year <2000  and year > 1997) or year = 1999 "
lexer = Lexer(sample1)
tokens = lexer.generate_tokens()
print(list(tokens))

[LPAREN, CLAUSE(year <2000), AND, CLAUSE(year > 1997), RPAREN, OR, CLAUSE(year = 1999)]


In [4]:
from dataclasses import dataclass

@dataclass
class ClauseNode:
    value: str
        
    def __repr__(self):
        return f"{self.value}"

@dataclass
class AndNode:
    node_a: any
    node_b: any
    
    def __repr__(self):
        return f"({self.node_a} and {self.node_b})"

@dataclass
class OrNode:
    node_a: any
    node_b: any
    
    def __repr__(self):
        return f"({self.node_a} or {self.node_b})"

In [5]:
class Parser:
    def __init__(self, tokens ):
        self.tokens = iter(tokens)
        self.advance()

        
    def raise_error(self):
        raise Exception("Invalid syntax")
        
    def advance(self):
        try:
            self.current_token = next(self.tokens)
        except StopIteration:
            self.current_token = None
    
    def parse(self):
        if self.current_token == None:
            return None
        
        result = self.andOr()
        
        if self.current_token != None:
            self.raise_error()
        
        return result
    
    def andOr(self):
        result = self.clause()
        
        while self.current_token != None and self.current_token.type in [TokenType.AND, TokenType.OR]:
                if self.current_token.type == TokenType.AND:
                    self.advance()
                    result = AndNode(result,self.clause())
                else:
                    self.advance()
                    result = OrNode(result,self.clause())

        
        return result
    
    def clause(self):
        token = self.current_token
        
        if token.type == TokenType.LPAREN:
            self.advance()
            result = self.andOr()
            
            if self.current_token.type != TokenType.RPAREN:
                self.raise_error()
            
            self.advance()
            return result
        if token.type == TokenType.CLAUSE:
            self.advance()
            return ClauseNode(token.value)
                
        self.raise_error()

In [6]:
sample3 = "year <2000  and  (year > 1997 or year = 1999 )"
lexer = Lexer(sample3)
tokens = lexer.generate_tokens()
parser = Parser(tokens)
tree = parser.parse()
print(tree)

(year <2000 and (year > 1997 or year = 1999))


In [7]:
from dataclasses import dataclass

@dataclass
class Clause:
    value: str
    judge: bool
        
    def __repr__(self):
        return f"{self.value} is {self.judge}"

In [8]:
class Interpreter:
    
    def __init__(self,jsonTreeNode):
        self.jsonTreeNode = jsonTreeNode
    
    def visit(self,node):
        method_name = f'visit_{type(node).__name__}'
        method = getattr(self,method_name)
        return method(node)
    
    def visit_ClauseNode(self, node):
        return Clause(node.value, singlePredicate(self.jsonTreeNode,node.value) )
    
    def visit_AndNode(self,node):
        return Clause(f'( {node.node_a} and {node.node_b})',bool(self.visit(node.node_a).judge and self.visit(node.node_b).judge))
    
    def visit_OrNode(self,node):
        return Clause(f'( {node.node_a} or {node.node_b})',bool(self.visit(node.node_a).judge or self.visit(node.node_b).judge))



In [9]:
def complexPredicate(node,complexPredicate):
    lexer = Lexer(complexPredicate)
    tokens = lexer.generate_tokens()
    parser = Parser(tokens)
    tree = parser.parse()
    interpreter = Interpreter(node)
    result = interpreter.visit(tree)
    return result.judge

***

In [10]:
def singlePredicate(node, predicate):
# todo: Circumstance 1: position

# Circumstance 2: evaluate value
# identify operator
    doubleOperators = ['>=', '<=', '!=']
    singleOperators = ['>', '<', '=']
    operatorIndex = -1
    operator = ''
    cmpVal = ''
    existDoubleOperator = False
    
    for op in doubleOperators:   
        if op in predicate:
            existDoubleOperator = True
            operator = op
            cmpVal = predicate[predicate.index(op) + 2 : ].strip()
            break
    if not existDoubleOperator:       
        for op in singleOperators:   
            if op in predicate:
                operator = op
                cmpVal = predicate[predicate.index(op) + 1 : ].strip()
                break       
            
#     compare values / operands
    selfVal = str(node.getValue())
    if selfVal.isnumeric():
        if not cmpVal.isnumeric():
            return False
        else:
            cmpVal = float(cmpVal)
            selfVal = float(selfVal)
            
    if operator == '>':
        return selfVal > cmpVal
    elif operator == '<':
        return selfVal < cmpVal
    elif operator == '=':
        return selfVal == cmpVal
    elif operator == '>=':
        return selfVal >= cmpVal
    elif operator == '<=':
        return selfVal <= cmpVal
    elif operator == '!=':
        return selfVal != cmpVal
    return False

In [11]:
# The input of buildTree is a dictionary
def buildTree(data):
    # get the name of the root node from the key of dictionary
    root_name = list(data.keys())[0]
    
    # initialize the tree node without parent
    root_node = jsonTreeNode(root_name, None )
    
    # modify the child/value attribute of the root node
    modifyNode(root_node, data[root_name])
    
    return root_node
    
    
#  The node may contain:

# 1. a basic element without structure , like a string/integer/float
#    -> this node is a leaf node
#    -> we need to modify node.value

# 2. a dictionary 
#    -> this node has child nodes
#    -> we need to modify node.child
# (1) {  child_name: [...] }, a group of siblings with the same name, eg: album, artist, song, genre
# (2) { child_name1, child_name2, ...}, a group of siblings with different names
# (3) a mix of (1) and (2), { child_name1:[...], childname2, childname3, ...}

def modifyNode(node, data):
    if isinstance(data,dict):
        setChild(node, data)
    else:
        node.setValue(data)

# data is a dictionary
def setChild(node, data):
    child_list = []
    for key in data:
        if isinstance(data[key],list):
            #there are multiple child nodes with the same name
            child_list.extend(sameName(parent = node,name = key ,data = data[key]))
        else:
            new_node = jsonTreeNode(key,node)
            modifyNode(new_node,data[key])
            child_list.append(new_node)
    node.setChild(child_list)
    
    
# "data" is a list 
def sameName(parent,name,data):
    res_list = []
    for ele in data:
        new_node = jsonTreeNode(name,parent)
        modifyNode(new_node,ele)
        res_list.append(new_node)
    return res_list


class jsonTreeNode:
    
    def __init__(self, name, parent = None):
        self.name = name
        self.parent = parent
        self.child = []
        self.value = None
        
    def __repr__(self):
        return f"<{self.name}>"
        
    # Set
    def setValue(self, value):
        self.value = value
    
    def setChild(self, child):
        self.child = child
    
    # Get
    def getName(self):
        return self.name
    
    def getParent(self):
        return self.parent
    
    def getChild(self):
        return self.child
    
    def getValue(self):
        return self.value
    
    def getSibling(self):
        return [ sibling for sibling in self.parent.child if sibling != self ]
    
    
    # Judgement -> True/False
    def isRoot(self):
        if self.parent:
            return False
        else:
            return True
    

    def haveChild(self):
        if self.child:
            return True
        else:
            return False
    
    def haveValue(self):
        if self.value is None:
            return False
        else:
            return True
    
    def haveSibling(self):
        if len(self.getSibling()) < 1:
            return False
        else:
            return True
    


In [12]:
library = {'library':{ 
                        'album': 
                                [

                                    {
                                     'title': 'Bua Hati',
                                     'artists': {'artist': [{'name': 'Anang Ashanty', 'country': 'Indonesia'},
                                                            {'name': 'Kris Dayanti', 'country': 'Indonesia'}]},
                                     'songs': {'song': [ {'title': 'Timang-Timang', 'duration': '5:13'},
                                                         {'title': 'Miliki Diriku', 'duration': '5:35'},
                                                         {'title': 'Bua Hati', 'duration': '5:07'}]},
                                     'genres': {'genre': ['Pop', 
                                                          'World']},
                                     'year': 1998},

                                    {'title': 'Separuh Jiwaku Pergi',
                                     'artists': {'artist': {'name': 'Anang Ashanty', 'country': 'Indonesia'}},
                                     'songs': {'song': [ {'title': 'Separuh Jiwaku Pergi', 'duration': '5:00'},
                                                         {'title': 'Belajarlah Untuk Cinta', 'duration': '5:23'},
                                                         {'title': 'Hujanpun Menangis', 'duration': '4:17'}]},
                                     'genres': {'genre': ['Pop', 
                                                          'World']},
                                     'year': 1998}
                                ]
                        }
            }

In [13]:
root1 = buildTree(library)
year1 = root1.getChild()[0].getChild()[-1]
sample4 = "child::year<=1990 or (child::year < 1995 and child::year<=2000)"
print(complexPredicate(year1, sample4))

False


In [14]:
print(root1)
lexer = Lexer(sample4)
tokens = lexer.generate_tokens()
parser = Parser(tokens)
tree = parser.parse()
interpreter = Interpreter(year1)
result = interpreter.visit(tree)
print(result)

<library>
( child::year<=1990 or (child::year < 1995 and child::year<=2000)) is False
