In [1]:
import string
import matplotlib.pyplot as plt

In [17]:
TOKENS = [
    ('LPAREN', '('),
    ('RPAREN', ')'),
    ('UNION', r'\cup'),
    ('INTERSECTION', r'\cap'),
    ('AND', r'\wedge'),
    ('OR', r'\vee'),
    ('NOT', r'\not'),
    ('IMPLIES', r'\implies'),
    ('CONTAINS', r'\in'),
    ('EXISTS', r'\exists'),
    ('FORALL', r'\forall'),
    ('SUBSET', r'\subset'),
    ('NOT', r'\neg'),
    ###################
    ('FORALL', '∀'),
    ('EXISTS', '∃'),
    ('NOT', '¬'),
    ('CONTAINS', '∈'),
    ('AND', '∧'),
    ('IMPLIES', '⇒'),
    ('SUBSET', '⊂'),
]

def tokenize(formula):
    formula = '(%s)' % formula
    def helper():
        i = 0
        while i < len(formula):
            if formula[i] == ' ':
                i += 1
                continue
            for t, v in TOKENS:
                if formula[i:i+len(v)] == v:
                    yield (t, None)
                    i += len(v)
                    break
            else:
                assert formula[i] in string.ascii_lowercase, (i, formula[i])
                yield ('VARIABLE', formula[i])
                i += 1
    return list(helper())

In [18]:
BINOPS = ['OR', 'AND', 'IMPLIES', 'CONTAINS', 'SUBSET']
QUANTS = ['FORALL', 'EXISTS']
PRINT_SYMS = {
    'OR': '∨',
    'AND': '∧',
    'IMPLIES': '⇒',
    'FORALL': '∀',
    'EXISTS': '∃',
    'CONTAINS': '∈',
    'SUBSET': '⊂',
    'NOT': '¬',
}

# VERBOSE = True # needed for idempotence
VERBOSE = False

class ASTNode:
    def __init__(self, node_type, node_value=None):
        self.t = node_type
        self.v = node_value

    def add_child(self, node):
        self.v.append(node)
    
    def __repr__(self):
        if isinstance(self.v, list):
            if self.t in BINOPS:
                return '(%s %s %s)' % (self.v[0], PRINT_SYMS[self.t], self.v[1])
            if self.t in QUANTS:
                if VERBOSE:
                    return '(%s%s%s)' % (PRINT_SYMS[self.t], self.v[0], self.v[1])
                else:
                    return '%s%s%s' % (PRINT_SYMS[self.t], self.v[0], self.v[1])
            if self.t == 'NOT':
                if VERBOSE:
                    return '(%s%s)' % (PRINT_SYMS[self.t], self.v[0])
                else:
                    return '%s%s' % (PRINT_SYMS[self.t], self.v[0])
            else:
                return '%s(%s)' % (self.t, ' '.join(str(n) for n in self.v))
        else:
            return str(self.v if self.t == 'VARIABLE' else self.t)

def parse(tokens):
    def parse_expression(index):
        if tokens[index][0] == 'LPAREN':
            index += 1
            node = ASTNode('GROUP', [])
            while tokens[index][0] != 'RPAREN':
                child_node, index = parse_expression(index)
                node.add_child(child_node)
            return node, index + 1  # Skip RPAREN
        else:
            return ASTNode(*tokens[index]), index+1

    ast_root, _ = parse_expression(0)
    return ast_root

In [19]:
# infers the proper grouping for not and forall/exists
def group_terms(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        
        def group_at(i): # tries to form a group at index i
            if i >= len(children)-1:
                return
            
            if children[i].t == 'NOT':
                op_node = children.pop(i)
                group_at(i)
                a_node = children.pop(i)
                assert a_node.t == 'GROUP', a_node
                children.insert(i, ASTNode('GROUP', [op_node, a_node]))
                
            if children[i].t in ['FORALL', 'EXISTS']:
                op_node = children.pop(i)
                x_node = children.pop(i)
                assert x_node.t == 'VARIABLE'
                group_at(i)
                phi_node = children.pop(i)
                assert phi_node.t == 'GROUP'
                children.insert(i, ASTNode('GROUP', [op_node, x_node, phi_node]))
                
        i = 0
        while i < len(children):
            group_at(i)
            i += 1
        
        if len(children) == 1:
            return children[0] # eliminate unary groups
        else:
            node.v = children
            return node
        
    return helper(node)

In [20]:
def transform_noop(node):
    if not isinstance(node.v, list):
        return node
    children = node.v = [transform_noop(n) for n in node.v]
    return node
    
def extract_quantifiers(node):
    if not isinstance(node.v, list):
        return node
    children = node.v = [extract_quantifiers(n) for n in node.v]
    if node.t == 'GROUP':
        if children[0].t in ['FORALL', 'EXISTS'] and not children[0].v:
            assert children[1].t == 'VARIABLE', children
            assert len(children) == 3
            node.t = children[0].t
            node.v = [children[1], children[2]]
    return node

def extract_binops(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'GROUP':
            if children[1].t in BINOPS and not children[1].v:
                assert len(children) == 3
                node.t = children[1].t
                node.v = [children[0], children[2]]
        return node
    return helper(node)

def extract_not(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'GROUP':
            if children[0].t == 'NOT' and not children[0].v:
                assert len(children) == 2, children
                node.t = children[0].t
                node.v = [children[1]]
        return node
    return helper(node)

In [21]:
def tag_logic(node):
    def helper(node):
        if not isinstance(node.v, list):
            assert node.t == 'VARIABLE', node
            node.is_logic = False
            return node
        for n in node.v:
            helper(n)
        if node.t in ['NOT', 'AND', 'OR', 'IMPLIES']:
            assert all(n.is_logic for n in node.v)
            node.is_logic = True
        elif node.t in ['EXISTS', 'FORALL']:
            assert node.v[1].is_logic # we already know v[0] is VARIABLE
            node.is_logic = True
        elif node.t == 'VARIABLE':
            node.is_logic = False
        elif node.t in ['CONTAINS', 'SUBSET']:
            assert all(n.t == 'VARIABLE' for n in node.v)
            node.is_logic = True
        else:
            assert False
            
    return helper(node)

In [22]:
def process(tree):
    tree = group_terms(tree)
    tree = extract_quantifiers(tree)
    tree = extract_binops(tree)
    tree = extract_not(tree)
    tag_logic(tree)
    return tree

In [23]:
def compile_subset(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'SUBSET':
            a, b = children
            imp = ASTNode('IMPLIES', [ASTNode('CONTAINS', [ASTNode('VARIABLE', 'z'), a]),
                                            ASTNode('CONTAINS', [ASTNode('VARIABLE', 'z'), b])])
            node = ASTNode('FORALL', [ASTNode('VARIABLE', 'z'), imp])
        return node
    return helper(node)

def compile_logic(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'OR':
            a, b = children
            node = ASTNode('NOT', [ASTNode('AND', [ASTNode('NOT', [a]), ASTNode('NOT', [b])])])
        if node.t == 'IMPLIES':
            a, b = children
            node = ASTNode('NOT', [ASTNode('AND', [a, ASTNode('NOT', [b])])])
        return node
    return helper(node)

def compile_exists(node):
    def helper(node):
        if not isinstance(node.v, list):
            return node
        children = node.v = [helper(n) for n in node.v]
        if node.t == 'EXISTS':
            x, phi = children
            node = ASTNode('NOT', [ASTNode('FORALL', [x, ASTNode('NOT', [phi])])])
        return node
    return helper(node)

def compile_ast(tree):
    tree = compile_subset(tree)
    tree = compile_logic(tree)
    tree = compile_exists(tree)
    return tree

In [30]:
# Axiom of the empty set
# formula = r'\exists x (\forall y (\not (y \in x)))'
formula = r'∃x∀y¬(y ∈ x)'

# Theorem: every set contains the empty set
# formula = r'\forall x ((\forall y (\not (y \in x))) \implies (\forall a (x \subset a)))'
# formula = r'∀x(∀y¬(y ∈ x) ⇒ ∀a(x ⊂ a))'

# Axiom of foundation
formula = r'∀x∀y \not ((x \in y) \wedge (y \in x))'

# Axiom of powerset
formula = r'∀x∃y∀z((z \subset x) \implies (z \in y))'

print(formula)

# formula = r'(a \in b) \vee (b \in a)'

tokens = tokenize(formula)
tree = parse(tokens)
tree = process(tree)
print(tree)
tree = compile_ast(tree)
print(tree)

∀x∃y∀z((z \subset x) \implies (z \in y))
∀x∃y∀z((z ⊂ x) ⇒ (z ∈ y))
∀x¬∀y¬∀z¬(∀z¬((z ∈ z) ∧ ¬(z ∈ x)) ∧ ¬(z ∈ y))
