In [31]:
import string
import matplotlib.pyplot as plt

In [744]:
TOKENS = [
    ('LPAREN', '('),
    ('RPAREN', ')'),
    ('UNION', r'\cup'),
    ('INTERSECTION', r'\cap'),
    ('LOGIC_AND', r'\wedge'),
    ('LOGIC_OR', r'\vee'),
    ('LOGIC_NOT', r'\not'),
    ('LOGIC_IMPLIES', r'\implies'),
    ('CONTAINS', r'\in'),
    ('EXISTS', r'\exists'),
    ('FORALL', r'\forall'),
    ('SUBSET', '\subset'),
    ###################
    ('FORALL', '∀'),
    ('EXISTS', '∃'),
]

def tokenize(formula):
    formula = '(%s)' % formula
    def helper():
        i = 0
        while i < len(formula):
            if formula[i] == ' ':
                i += 1
                continue
            for t, v in TOKENS:
                if formula[i:i+len(v)] == v:
                    yield (t, None)
                    i += len(v)
                    break
            else:
                assert formula[i] in string.ascii_lowercase, (i, formula[i])
                yield ('VARIABLE', formula[i])
                i += 1
    return list(helper())

In [746]:
BINOPS = ['LOGIC_OR', 'LOGIC_AND', 'LOGIC_IMPLIES', 'CONTAINS', 'SUBSET']
QUANTS = ['FORALL', 'EXISTS']
PRINT_SYMS = {
    'LOGIC_OR': '∨',
    'LOGIC_AND': '∧',
    'LOGIC_IMPLIES': '⇒',
    'FORALL': '∀',
    'EXISTS': '∃',
    'CONTAINS': '∈',
    'SUBSET': '⊂',
    'LOGIC_NOT': '¬',
}

class ASTNode:
    def __init__(self, node_type, node_value):
        self.t = node_type
        self.v = node_value

    def add_child(self, node):
        self.v.append(node)
    
    def __repr__(self):
        if isinstance(self.v, list):
            if self.t in BINOPS:
                return '(%s %s %s)' % (self.v[0], PRINT_SYMS[self.t], self.v[1])
            if self.t in QUANTS:
                return '%s%s%s' % (PRINT_SYMS[self.t], self.v[0], self.v[1])
            if self.t == 'LOGIC_NOT':
                return '%s%s' % (PRINT_SYMS[''])
            return '%s(%s)' % (self.t, ' '.join(str(c) for c in self.v))
        else:
            return self.t if self.t != 'VARIABLE' else self.v

def parse(tokens):
    def parse_expression(index):
        if tokens[index][0] == 'LPAREN':
            index += 1
            node = ASTNode('GROUP', [])
            while tokens[index][0] != 'RPAREN':
                child_node, index = parse_expression(index)
                node.add_child(child_node)
            return node, index + 1  # Skip RPAREN
        else:
            return ASTNode(*tokens[index]), index+1

    ast_root, _ = parse_expression(0)
    return ast_root

In [788]:
def transform_noop(node):
    if not isinstance(node.v, list):
        return node
    children = node.v = [transform_noop(n) for n in node.v]
    return node

def eliminate_unary_groups(node):
    if not isinstance(node.v, list):
        return node
    children = node.v = [eliminate_unary_groups(n) for n in node.v]
    if node.t == 'GROUP':
        if children[0].t == 'GROUP' and len(children) == 1:
            node.t = children[0].t
            node.v = children[0].v
    return node
    
def extract_quantifiers(node):
    if not isinstance(node.v, list):
        return node
    children = node.v = [extract_quantifiers(n) for n in node.v]
    if node.t == 'GROUP':
        if children[0].t in ['FORALL', 'EXISTS'] and not children[0].v:
            assert children[1].t == 'VARIABLE', children
            assert len(children) == 3
            node.t = children[0].t
            node.v = [children[1], children[2]]
    return node

def extract_binops(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'GROUP':
            if children[1].t in BINOPS and not children[1].v:
                assert len(children) == 3
                node.t = children[1].t
                node.v = [children[0], children[2]]
        return node
    return helper(node)

def extract_not(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'GROUP':
            if children[0].t == 'LOGIC_NOT' and not children[0].v:
                assert len(children) == 2, children
                node.t = children[0].t
                node.v = [children[1]]
        return node
    return helper(node)

In [789]:
def tag_logic(node):
    def helper(node):
        if not isinstance(node.v, list):
            assert node.t == 'VARIABLE', node
            node.is_logic = False
            return node
        for n in node.v:
            helper(n)
        if node.t in ['LOGIC_NOT', 'LOGIC_AND', 'LOGIC_OR', 'LOGIC_IMPLIES']:
            assert all(n.is_logic for n in node.v)
            node.is_logic = True
        elif node.t in ['EXISTS', 'FORALL']:
            assert node.v[1].is_logic # we already know v[0] is VARIABLE
            node.is_logic = True
        elif node.t == 'VARIABLE':
            node.is_logic = False
        elif node.t in ['CONTAINS', 'SUBSET']:
            assert all(n.t == 'VARIABLE' for n in node.v)
            node.is_logic = True
        else:
            assert False
            
    return helper(node)

In [790]:
def process(tree):
    tree = eliminate_unary_groups(tree)
    tree = extract_quantifiers(tree)
    tree = extract_binops(tree)
    tree = extract_not(tree)
#     tree = extract_contains(tree)
#     tag_logic(tree)
    return tree

In [791]:
# formula = r'\exists x (\forall y (\not (y \in x)))' # exists varnothing

# formula = r'\exists x (\forall y ((y \in x) \implies (\forall a ((y \in a) \implies (a \in x)))))'

# formula = r'((b \in y) \vee (a \in z))'

formula = r'\forall x ((\forall y (\not (y \in x))) \implies (\forall y (x \subset y)))'

tokens = tokenize(formula)
tree = parse(tokens)
tree = process(tree)
tree

∀x(∀yLOGIC_NOT((y ∈ x)) ⇒ ∀y(x ⊂ y))

In [561]:
# formula = r'x \cup y'
# formula = r'x \cup (y \cap (z \cup a))'
# formula = r'\not ((\not a) \vee b)'
# formula = r'\not (x \in y)'
formula = r'\exists x (\forall y (\not (y \in x)))'
# formula = '(%s)' % formula

tokens = tokenize(formula)
tree = parse(tokens)
tree = process(tree)

In [439]:
tree.v[0]

GROUP(LOGIC_NOT GROUP(VARIABLE CONTAINS VARIABLE))

In [427]:
extract_logic(tree.v[1].v[1])

GROUP(LOGIC_NOT GROUP(VARIABLE CONTAINS VARIABLE))
GROUP(VARIABLE CONTAINS VARIABLE)


LOGIC_NOT(GROUP(VARIABLE CONTAINS VARIABLE))

In [426]:
tree.v[1].v[1]

GROUP(LOGIC_NOT GROUP(VARIABLE CONTAINS VARIABLE))