In [1]:
# !pip install lark-parser

In [112]:
from lark import Lark, Transformer, Tree, Token


hoa_grammar = r"""
    start: header "--BODY--" body "--END--"
    
    header: version tool? start_state acc_name acceptance properties? ap_decl

    version: "HOA:" /v\d+/
    tool: "tool:" ESCAPED_STRING*
    start_state: "Start:" INT
    acc_name: "acc-name:" "generalized-Buchi" INT
    acceptance: "Acceptance:" INT acceptance_cond*
    properties: ("properties:" LOWER_STRING+)+
    ap_decl: "AP:" INT (ESCAPED_STRING)*

    acceptance_cond: "Inf(" INT ")" ("&" "Inf(" INT ")")*

    body: state*
    state: "State:" INT transition* 
    transition: "[" label_expr "]" INT acc_sig?

    label_expr: expr
    expr: factor (LOGIC_OP factor)*
    factor: LOGIC_NOT factor
          | L_PAR expr R_PAR
          | INT
          | IDENTIFIER
    acc_sig: "{" INT+ "}"
    
    IDENTIFIER: /[a-zA-Z_][0-9a-zA-Z_-]*/
    STRING: /[a-zA-Z_-]+/
    LOWER_STRING: /[0-9a-z_-]+/
    LOGIC_OP: "&" | "|"
    LOGIC_NOT: "!"
    L_PAR: "("
    R_PAR: ")"
    
    %import common.ESCAPED_STRING
    %import common.INT
    %import common.WS
    %ignore WS
"""

class HOA_Transformer(Transformer):
    def start(self, items):
        return {
            'header': items[0],
            'body': items[1]
        }
    
    def header(self, items):
        return {
            'version': items[0],
            'tool': items[1] if len(items) > 1 else None,
            'start_state': items[2],
            'acc_name': items[3],
            'acceptance': items[4],
            'properties': items[5],
            'ap_decl': items[6]
        }
    
    def body(self, items):
        return {'states': items}
    
    def state(self, items):
        return {
            'state_id': items[0],
            'transitions': items[1:]
        }

    def transition(self, items):
        return {
            'label': items[0],
            'destination': items[1],
            'acc_sig': items[2] if len(items) > 2 else None
        }

    def acceptance_cond(self, items):
        return [f"{i}" for i in items]

    def ap_decl(self, items):
        count = items[0]  # Number of atomic propositions
        propositions = {items[i].replace('"', ''): i - 1 for i in range(1, len(items))}
        return {'count': count, 'propositions': propositions}

hoa_parser = Lark(hoa_grammar, parser='lalr', transformer=HOA_Transformer())


hoa_input = '''
HOA: v1
tool: "owl" "* *"
Start: 0
acc-name: generalized-Buchi 1
Acceptance: 1 Inf(0) Inf(7)
properties: trans-acc trans-label
AP: 2 "a" "b"
--BODY--
State: 0
[!0 & 1] 0
[0 & 1] 1 {0}
State: 1
[1] 1 {0 1 2 3}
State: 4
[t] 4 {0}
--END--
'''

parsed_hoa = hoa_parser.parse(hoa_input)
print(parsed_hoa)

{'header': {'version': Tree('version', [Token('__ANON_3', 'v1')]), 'tool': Tree('tool', [Token('ESCAPED_STRING', '"owl"'), Token('ESCAPED_STRING', '"* *"')]), 'start_state': Tree('start_state', [Token('INT', '0')]), 'acc_name': Tree('acc_name', [Token('INT', '1')]), 'acceptance': Tree('acceptance', [Token('INT', '1'), ['0'], ['7']]), 'properties': Tree('properties', [Token('LOWER_STRING', 'trans-acc'), Token('LOWER_STRING', 'trans-label')]), 'ap_decl': {'count': Token('INT', '2'), 'propositions': {'a': 0, 'b': 1}}}, 'body': {'states': [{'state_id': Token('INT', '0'), 'transitions': [{'label': Tree('label_expr', [Tree('expr', [Tree('factor', [Token('LOGIC_NOT', '!'), Tree('factor', [Token('INT', '0')])]), Token('LOGIC_OP', '&'), Tree('factor', [Token('INT', '1')])])]), 'destination': Token('INT', '0'), 'acc_sig': None}, {'label': Tree('label_expr', [Tree('expr', [Tree('factor', [Token('INT', '0')]), Token('LOGIC_OP', '&'), Tree('factor', [Token('INT', '1')])])]), 'destination': Token('I

In [119]:
from dataclasses import dataclass

@dataclass
class HOAAutomataTransition:
    label: str
    destination: str
    accepting_signature: list[str]
    
    def __post_init__(self):
        if self.label in ["t"]:
            self.label = "epsilon"
    
    def __str__(self):
        return f"{self.label} -> ({self.destination})" + (f" ACC[{', '.join(self.accepting_signature)}]" if self.accepting_signature else "")
    

@dataclass
class HOAAutomataState:
    state_id: str
    transitions: list[HOAAutomataTransition]
    
    def __str__(self):
        return f"{'('+self.state_id+')':<6}" + ("\n"+6*" ").join([f"-> {tr}" for tr in self.transitions])
    

In [120]:
class HOAParsedHeaderHelper:
    
    @staticmethod
    def extract_start_state_id(parsed_tree):
        return parsed_tree['header']['start_state'].children[0].value

    @staticmethod
    def extract_accepting_sink_sets_id(parsed_tree):
        return [
            list(ch)
            for ch in parsed_tree['header']['acceptance'].children[1:]
        ]
    
    @staticmethod
    def extract_atomic_propositions_to_symbol(parsed_tree):
        return parsed_tree['header']['ap_decl']['propositions']
    
    @staticmethod
    def extract_useful_header_info(parsed_tree):
        return {
            'start_state_id': HOAParsedHeaderHelper.extract_start_state_id(parsed_tree),
            'accepting_sink_sets_id': HOAParsedHeaderHelper.extract_accepting_sink_sets_id(parsed_tree),
            'atomic_propositions_to_symbol': HOAParsedHeaderHelper.extract_atomic_propositions_to_symbol(parsed_tree)
        }


HOAParsedHeaderHelper.extract_useful_header_info(parsed_hoa)

{'start_state_id': '0',
 'accepting_sink_sets_id': [['0'], ['7']],
 'atomic_propositions_to_symbol': {'a': 0, 'b': 1}}

In [121]:
class HOAParsedBodyHelper:
    
    @staticmethod
    def _extract_state_id(parsed_state):
        return parsed_state['state_id'].value
    
    @staticmethod
    def _label_walk_helper(transition_label):
        """
        Recursively walks through the parsed tree of a transition label
        and converts it into a string representation.
        """
        if isinstance(transition_label, Tree):
            return "".join([HOAParsedBodyHelper._label_walk_helper(child) for child in transition_label.children])
        elif isinstance(transition_label, Token):
            return transition_label.value
        return str(transition_label)
    
    @staticmethod
    def _extract_acc_sig(acc_sig):
        if acc_sig is None:
            return []
        return [
            ch.value for ch in acc_sig.children
        ]
    
    @staticmethod
    def extract_transitions(state_transitions):
        return [
            HOAAutomataTransition(
                label=HOAParsedBodyHelper._label_walk_helper(tr['label']),
                destination=tr['destination'],
                accepting_signature=HOAParsedBodyHelper._extract_acc_sig(tr['acc_sig'])
            )
            for tr in state_transitions
        ]
    
    @staticmethod
    def extract_states(parsed_tree):
        return [
            HOAAutomataState(
                state_id=HOAParsedBodyHelper._extract_state_id(st),
                transitions=HOAParsedBodyHelper.extract_transitions(st['transitions'])
            )
            for st in parsed_tree['body']['states']
        ]
    
for st in HOAParsedBodyHelper.extract_states(parsed_hoa):
    print(st)

(0)   -> !0&1 -> (0)
      -> 0&1 -> (1) ACC[0]
(1)   -> 1 -> (1) ACC[0, 1, 2, 3]
(4)   -> epsilon -> (4) ACC[0]
