In [111]:
from lark import Lark, Transformer, Tree, Token
from dataclasses import dataclass, field
from typing import List, Optional, Any, Dict

In [112]:
hoa_grammar = r"""
    start: header "--BODY--" body "--END--"

    header: header_item*
    header_item: version
               | properties
               | state_count
               | start_state
               | atomic_prepositions
               | acceptance
               | tool
               | name
               | owl_args
               | acc_name

    version: "HOA:" /v\d+/
    tool: "tool:" ESCAPED_STRING+
    name: "name:" ESCAPED_STRING
    owl_args: "owlArgs:" ESCAPED_STRING+
    start_state: "Start:" INT
    acc_name: "acc-name:" ("generalized-Buchi" INT | "Buchi")
    acceptance: "Acceptance:" INT acceptance_cond*
    properties: ("properties:" PROPERTY+)+
    atomic_prepositions: "AP:" INT (ESCAPED_STRING)*
    state_count: "States:" INT

    acceptance_cond: "Inf(" INT ")" ("&" "Inf(" INT ")")*

    body: state_chunk*
    state_chunk: state_name edge*
    state_name: "State:" label? INT ESCAPED_STRING? acc_sig?
    edge: label? INT acc_sig?
    label: "[" label_expr "]"
    label_expr: boolean
              | INT
              | IDENTIFIER
              | LOGIC_NOT label_expr
              | L_PAR label_expr R_PAR
              | label_expr LOGIC_OP label_expr
    boolean: "true" | "false" | "t" | "f"
    acc_sig: "{" INT* "}"


    IDENTIFIER: /[a-zA-Z_][0-9a-zA-Z_-]*/
    STRING: /[a-zA-Z_-]+/
    PROPERTY: "state-labels" | "trans-labels" | "implicit-labels" | "explicit-labels"
             | "state-acc" | "trans-acc" | "univ-branch" | "no-univ-branch"
             | "deterministic" | "complete" | "unambiguous" | "stutter-invariant"
             | "weak" | "very-weak" | "inherently-weak" | "terminal" | "tight"
             | "colored"
    LOGIC_OP: "&" | "|"
    LOGIC_NOT: "!"
    L_PAR: "("
    R_PAR: ")"

    %import common.ESCAPED_STRING
    %import common.INT
    %import common.WS
    %ignore WS
"""


In [113]:
class HOA_Transformer(Transformer):
    def start(self, items):
        return {
            'header': items[0],
            'body': items[1]
        }

    def header_item(self, items):
        return items[0]

    def header(self, items):
        result = {
            'version': None,
            'tool': None,
            'name': None,
            'owl_args': None,
            'start_state': None,
            'acc_name': None,
            'acceptance': None,
            'properties': None,
            'ap_decl': None,
            'state_count': None
        }
        for subdict in items:
            for key, val in subdict.items():
                result[key] = val
        return result

    def version(self, items):
        return {'version': items[0].value}

    def tool(self, items):
        vals = [s.strip('"') for s in items]
        return {'tool': vals}

    def name(self, items):
        return {'name': items[0].strip('"')}

    def owl_args(self, items):
        vals = [s.strip('"') for s in items]
        return {'owl_args': vals}

    def start_state(self, items):
        return {'start_state': int(items[0].value)}

    def acc_name(self, items):
        return {'acc_name': " ".join(str(i.value if isinstance(i, Token) else i) for i in items)}

    def acceptance(self, items):
        acc_id = int(items[0].value)
        conds = items[1:]
        return {'acceptance': {"acc_id": acc_id, "conds": conds}}

    def properties(self, items):
        return {'properties': [p.value for p in items]}

    def atomic_prepositions(self, items):
        count = int(items[0].value)  # number of APs
        props = [it.value.strip('"') for it in items[1:]]
        return {'ap_decl': {'count': count, 'propositions': props}}

    def state_count(self, items):
        return {'state_count': int(items[0])}

    def acceptance_cond(self, items):
        return [f"{i}" for i in items]

    def body(self, items):
        return {'states': items}

    def state_chunk(self, items):
        state_info = items[0]
        edges = items[1:]
        return {
            'state': state_info,
            'edges': edges
        }

    def state_name(self, items):
        idx = 0
        label_ = None
        if isinstance(items[idx], dict) and 'label' in items[idx]:
            label_ = items[idx]['label']
            idx += 1
        state_id = int(items[idx].value)
        idx += 1
        name_ = None
        if idx < len(items) and isinstance(items[idx], Token) and items[idx].type == 'ESCAPED_STRING':
            name_ = items[idx].value.strip('"')
            idx += 1
        acc_sig_ = None
        if idx < len(items) and isinstance(items[idx], dict) and 'acc_sig' in items[idx]:
            acc_sig_ = items[idx]['acc_sig']
            idx += 1

        return {
            'label': label_,
            'state_id': state_id,
            'dstring': name_,
            'acc_sig': acc_sig_
        }

    def edge(self, items):
        idx = 0
        label_ = None
        if isinstance(items[idx], dict) and 'label' in items[idx]:
            label_ = items[idx]['label']
            idx += 1

        dest_state = int(items[idx].value)
        idx += 1

        acc_sig_ = None
        if idx < len(items) and isinstance(items[idx], dict) and 'acc_sig' in items[idx]:
            acc_sig_ = items[idx]['acc_sig']

        return {
            'label': label_,
            'destination': dest_state,
            'acc_sig': acc_sig_
        }

    def label(self, items):
        return {'label': items[0]}

    def acc_sig(self, items):
        return {'acc_sig': [int(t.value) for t in items]}

    def label_expr(self, items):
        if len(items) == 1:
            it = items[0]
            return str(it.value if isinstance(it, Token) else it)
        if len(items) == 2:
            op, subexpr = items
            if isinstance(op, Token) and op.type == "LOGIC_NOT":
                return f"!{subexpr}"
            else:
                raise ValueError(f"Unexpected 2-item label_expr: {op=}, {subexpr=}")
        if len(items) == 3:
            # Could be "(" expr ")" or expr op expr
            a, op, b = items
            if isinstance(op, Token) and op.type == "LOGIC_OP":
                return f"({a} {op.value} {b})"
            if isinstance(a, Token) and a.type == "L_PAR" and isinstance(b, Token) and b.type == "R_PAR":
                return f"({op})"
            raise ValueError(f"Unknown 3-item label_expr pattern: {items}")
        raise ValueError(f"Invalid label expression: {items}")

    def boolean(self, items):
        val = items[0].value
        return True if val in ["true", "t"] else False


# Utils

In [114]:
@dataclass
class HOAAutomataTransition:
    destination: int
    acc_sig: List[int] = field(default_factory=list)
    label: Optional[str] = None

    def __str__(self):
        return f"[{self.label}] -> {self.destination} " + "{" + ",".join(map(str, self.acc_sig)) + "}"

In [115]:
@dataclass
class HOAAutomataState:
    state_id: int
    acc_sig: List[int] = field(default_factory=list)
    transitions: List[HOAAutomataTransition] = field(default_factory=list)
    label: Optional[str] = None
    docString: Optional[str] = None

    def __str__(self):
        parts = [f"{self.state_id}"]
        if self.label:
            parts.append(f" ({self.label})")
        if self.docString:
            parts.append(f' "{self.docString}"')
        if self.acc_sig:
            acc_str = ",".join(map(str, self.acc_sig))
            parts.append(f" {{{acc_str}}}")
        header_str = "".join(parts)
        transitions_str_list = []
        for tr in self.transitions:
            transitions_str_list.append("\n\t" + str(tr))

        return header_str + "".join(transitions_str_list)

In [116]:
def build_automata_states(parsed_body: Dict[str, Any]) -> List[HOAAutomataState]:
    result_states: List[HOAAutomataState] = []
    for chunk in parsed_body.get("states", []):
        state_info = chunk["state"]
        state_label = state_info["label"]
        doc_string = state_info["dstring"]
        acc_sig_list = state_info["acc_sig"] if state_info["acc_sig"] is not None else []

        transitions_list: List[HOAAutomataTransition] = []
        for edge_info in chunk["edges"]:
            edge_label = edge_info["label"]
            edge_acc_sig = edge_info["acc_sig"] if edge_info["acc_sig"] is not None else []
            trans = HOAAutomataTransition(
                label=edge_label,
                acc_sig=edge_acc_sig,
                destination=edge_info["destination"]
            )
            transitions_list.append(trans)

        state_obj = HOAAutomataState(
            state_id=state_info["state_id"],
            acc_sig=acc_sig_list,
            transitions=transitions_list,
            label=state_label,
            docString=doc_string
        )
        result_states.append(state_obj)

    return result_states

In [117]:
class HOAParsedHeaderHelper:
    @staticmethod
    def extract_start_state_id(parsed_tree):
        return str(parsed_tree['header']['start_state'].children[0].value)

    @staticmethod
    def extract_accepting_sink_sets_id(parsed_tree):
        return [
            str(ch)
            for ch in parsed_tree['header']['acceptance'].children[1:][0]
        ]

    @staticmethod
    def extract_atomic_propositions_to_symbol(parsed_tree):
        return {
            int(v): str(k)
            for k, v in parsed_tree['header']['ap_decl']['propositions'].items()
        }

    @staticmethod
    def extract_useful_header_info(parsed_tree):
        return {
            'start_state_id': HOAParsedHeaderHelper.extract_start_state_id(parsed_tree),
            'accepting_sink_sets_id': HOAParsedHeaderHelper.extract_accepting_sink_sets_id(parsed_tree),
            'atomic_symbol_to_propositions': HOAParsedHeaderHelper.extract_atomic_propositions_to_symbol(parsed_tree)
        }


In [118]:
class HOAParsedBodyHelper:
    @staticmethod
    def _extract_state_id(parsed_state):
        return parsed_state['state_id'].value

    @staticmethod
    def _label_walk_helper(transition_label):
        """
        Recursively walks through the parsed tree of a transition label
        and converts it into a string representation.
        """
        if isinstance(transition_label, Tree):
            return "".join([HOAParsedBodyHelper._label_walk_helper(child) for child in transition_label.children])
        elif isinstance(transition_label, Token):
            return transition_label.value
        return str(transition_label)

    @staticmethod
    def _extract_acc_sig(acc_sig):
        if acc_sig is None:
            return []
        return [
            ch.value for ch in acc_sig.children
        ]

    @staticmethod
    def extract_transitions(state_transitions):
        return [
            HOAAutomataTransition(
                label=HOAParsedBodyHelper._label_walk_helper(tr['label']),
                destination=tr['destination'],
                accepting_signature=HOAParsedBodyHelper._extract_acc_sig(tr['acc_sig'])
            )
            for tr in state_transitions
        ]

    @staticmethod
    def extract_states(parsed_tree):
        return [
            HOAAutomataState(
                state_id=HOAParsedBodyHelper._extract_state_id(st),
                transitions=HOAParsedBodyHelper.extract_transitions(st['transitions'])
            )
            for st in parsed_tree['body']['states']
        ]


In [119]:
class HOAParser:
    __slots__ = ["parser"]

    def __init__(self):
        self.parser = Lark(hoa_grammar, parser='lalr', transformer=HOA_Transformer())

    def __call__(self, hoa_format_ldba):
        ldba = self.parser.parse(hoa_format_ldba)
        return {
            'header': HOAParsedHeaderHelper.extract_useful_header_info(ldba),
            'states': HOAParsedBodyHelper.extract_states(ldba)
        }


# Test trans-acc

In [120]:
_hoa_input = '''
HOA: v1
tool: "owl ltl2ldgba" "21.0"
name: "Automaton for ((G(F(a))) & (G(F(b))))"
owlArgs: "ltl2ldgba" "-f" "(GF a)&(GF b)"
Start: 0
acc-name: generalized-Buchi 2
Acceptance: 2 Inf(0) & Inf(1)
properties: trans-acc no-univ-branch
properties: deterministic unambiguous
properties: complete
AP: 2 "a" "b"
--BODY--
State: 0
[!0 & !1] 0
[0 & !1] 0 {0}
[!0 & 1] 0 {1}
[0 & 1] 0 {0 1}
--END--
'''

# _parser = HOAParser()
# _parsed_hoa = _parser(_hoa_input)
parser = Lark(hoa_grammar, parser='lalr', transformer=HOA_Transformer())
_parsed_hoa = parser.parse(_hoa_input)

In [121]:
import json
print(json.dumps(_parsed_hoa["header"], indent=2))

{
  "version": "v1",
  "tool": [
    "owl ltl2ldgba",
    "21.0"
  ],
  "name": "Automaton for ((G(F(a))) & (G(F(b))))",
  "owl_args": [
    "ltl2ldgba",
    "-f",
    "(GF a)&(GF b)"
  ],
  "start_state": 0,
  "acc_name": "2",
  "acceptance": {
    "acc_id": 2,
    "conds": [
      [
        "0",
        "1"
      ]
    ]
  },
  "properties": [
    "trans-acc",
    "no-univ-branch",
    "deterministic",
    "unambiguous",
    "complete"
  ],
  "ap_decl": {
    "count": 2,
    "propositions": [
      "a",
      "b"
    ]
  },
  "state_count": null
}


In [122]:
print(_parsed_hoa["body"])

{'states': [{'state': {'label': None, 'state_id': 0, 'dstring': None, 'acc_sig': None}, 'edges': [{'label': '!(0 & !1)', 'destination': 0, 'acc_sig': None}, {'label': '(0 & !1)', 'destination': 0, 'acc_sig': [0]}, {'label': '!(0 & 1)', 'destination': 0, 'acc_sig': [1]}, {'label': '(0 & 1)', 'destination': 0, 'acc_sig': [0, 1]}]}]}


In [123]:
ldba = build_automata_states(_parsed_hoa["body"])
for st in ldba:
    print("-", st)

- 0
	[!(0 & !1)] -> 0 {}
	[(0 & !1)] -> 0 {0}
	[!(0 & 1)] -> 0 {1}
	[(0 & 1)] -> 0 {0,1}


# Test state-acc

In [124]:
_hoa_input = '''
HOA: v1
properties: complete deterministic no-univ-branch state-acc unambiguous
States: 4
Start: 0
AP: 2 "a" "b"
Acceptance: 2 Inf(0) & Inf(1)
acc-name: generalized-Buchi 2
name: "Automaton for ((G(F(a))) & (G(F(b))))"
tool: "owl ltl2ldgba" "21.0"
--BODY--
State: 0
[!0 & !1] 0
[0 & !1] 1
[!0 & 1] 2
[0 & 1] 3
State: 1 {0}
[!0 & !1] 0
[0 & !1] 1
[!0 & 1] 2
[0 & 1] 3
State: 2 {1}
[!0 & !1] 0
[0 & !1] 1
[!0 & 1] 2
[0 & 1] 3
State: 3 {0 1}
[!0 & !1] 0
[0 & !1] 1
[!0 & 1] 2
[0 & 1] 3
--END--
'''

# _parser = HOAParser()
# _parsed_hoa = _parser(_hoa_input)
parser = Lark(hoa_grammar, parser='lalr', transformer=HOA_Transformer())
_parsed_hoa = parser.parse(_hoa_input)

In [125]:
import json
print(json.dumps(_parsed_hoa["header"], indent=2))

{
  "version": "v1",
  "tool": [
    "owl ltl2ldgba",
    "21.0"
  ],
  "name": "Automaton for ((G(F(a))) & (G(F(b))))",
  "owl_args": null,
  "start_state": 0,
  "acc_name": "2",
  "acceptance": {
    "acc_id": 2,
    "conds": [
      [
        "0",
        "1"
      ]
    ]
  },
  "properties": [
    "complete",
    "deterministic",
    "no-univ-branch",
    "state-acc",
    "unambiguous"
  ],
  "ap_decl": {
    "count": 2,
    "propositions": [
      "a",
      "b"
    ]
  },
  "state_count": 4
}


In [126]:
print(_parsed_hoa["body"])

{'states': [{'state': {'label': None, 'state_id': 0, 'dstring': None, 'acc_sig': None}, 'edges': [{'label': '!(0 & !1)', 'destination': 0, 'acc_sig': None}, {'label': '(0 & !1)', 'destination': 1, 'acc_sig': None}, {'label': '!(0 & 1)', 'destination': 2, 'acc_sig': None}, {'label': '(0 & 1)', 'destination': 3, 'acc_sig': None}]}, {'state': {'label': None, 'state_id': 1, 'dstring': None, 'acc_sig': [0]}, 'edges': [{'label': '!(0 & !1)', 'destination': 0, 'acc_sig': None}, {'label': '(0 & !1)', 'destination': 1, 'acc_sig': None}, {'label': '!(0 & 1)', 'destination': 2, 'acc_sig': None}, {'label': '(0 & 1)', 'destination': 3, 'acc_sig': None}]}, {'state': {'label': None, 'state_id': 2, 'dstring': None, 'acc_sig': [1]}, 'edges': [{'label': '!(0 & !1)', 'destination': 0, 'acc_sig': None}, {'label': '(0 & !1)', 'destination': 1, 'acc_sig': None}, {'label': '!(0 & 1)', 'destination': 2, 'acc_sig': None}, {'label': '(0 & 1)', 'destination': 3, 'acc_sig': None}]}, {'state': {'label': None, 'sta

In [127]:
ldba = build_automata_states(_parsed_hoa["body"])
for st in ldba:
    print("-", st)

- 0
	[!(0 & !1)] -> 0 {}
	[(0 & !1)] -> 1 {}
	[!(0 & 1)] -> 2 {}
	[(0 & 1)] -> 3 {}
- 1 {0}
	[!(0 & !1)] -> 0 {}
	[(0 & !1)] -> 1 {}
	[!(0 & 1)] -> 2 {}
	[(0 & 1)] -> 3 {}
- 2 {1}
	[!(0 & !1)] -> 0 {}
	[(0 & !1)] -> 1 {}
	[!(0 & 1)] -> 2 {}
	[(0 & 1)] -> 3 {}
- 3 {0,1}
	[!(0 & !1)] -> 0 {}
	[(0 & !1)] -> 1 {}
	[!(0 & 1)] -> 2 {}
	[(0 & 1)] -> 3 {}
