Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework RSA #20

Merged
merged 8 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 40 additions & 43 deletions pyformlang/rsa/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,56 +16,57 @@ class Box:
----------
enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
A epsilon nfa
label : :class:`~pyformlang.finite_automaton.Symbol`
A label for epsilon nfa
nonterminal : :class:`~pyformlang.finite_automaton.Symbol`
A nonterminal for epsilon nfa

"""

def __init__(self, enfa: EpsilonNFA = None, label: Symbol = None):
if enfa is not None:
enfa = enfa.minimize()
self._dfa = enfa or EpsilonNFA()

if label is not None:
label = to_symbol(label)
self._label = label or Symbol("")

def change_label(self, label: Symbol):
""" Set a new label

Parameters
-----------
label : :class:`~pyformlang.finite_automaton.Symbol`
The new label for automaton

"""
self._label = to_symbol(label)

def change_dfa(self, enfa: EpsilonNFA):
""" Set an epsilon finite automaton

Parameters
-----------
enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
The new epsilon finite automaton

"""
enfa = enfa.minimize()
def __init__(self, enfa: EpsilonNFA, nonterminal: Symbol | str):
KubEF marked this conversation as resolved.
Show resolved Hide resolved
self._dfa = enfa

nonterminal = to_symbol(nonterminal)
self._nonterminal = nonterminal

def to_subgraph_dot(self):
KubEF marked this conversation as resolved.
Show resolved Hide resolved
"""Creates a named subgraph representing a box"""
graph = self._dfa.to_networkx()
strange_nodes = []
dot_string = (f'subgraph cluster_{self._nonterminal}\n{{ label="{self._nonterminal}"\n'
f'fontname="Helvetica,Arial,sans-serif"\n'
f'node [fontname="Helvetica,Arial,sans-serif"]\n'
f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n'
f'node [shape = circle style=filled fillcolor=white]')
for node, data in graph.nodes(data=True):
node = node.replace('"', '').replace("'", "")
if 'is_start' not in data.keys() or 'is_final' not in data.keys():
strange_nodes.append(node)
continue
if data['is_start']:
dot_string += f'\n"{node}" [fillcolor = green];'
if data['is_final']:
dot_string += f'\n"{node}" [shape = doublecircle];'
for strange_node in strange_nodes:
graph.remove_node(strange_node)
for node_from, node_to, data in graph.edges(data=True):
node_from = node_from.replace('"', '').replace("'", "")
node_to = node_to.replace('"', '').replace("'", "")
KubEF marked this conversation as resolved.
Show resolved Hide resolved
label = data['label']
dot_string += f'\n"{node_from}" -> "{node_to}" [label = "{label}"];'
dot_string += "\n}"
return dot_string
@property
def dfa(self):
""" Box's dfa """
return self._dfa

@property
def label(self):
""" Box's label """
return self._label
def nonterminal(self):
""" Box's nonterminal """
return self._nonterminal

@property
def start_state(self):
""" The start state """
def start_states(self):
""" The start states """
return self._dfa.start_states

@property
Expand All @@ -90,14 +91,10 @@ def is_equivalent_to(self, other):
if not isinstance(other, Box):
return False

if self._dfa.is_equivalent_to(other.dfa) and \
self._label == other.label:
return True

return False
return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal

def __eq__(self, other):
return self.is_equivalent_to(other)

def __hash__(self):
return self._label.__hash__()
return self._nonterminal.__hash__()
175 changes: 72 additions & 103 deletions pyformlang/rsa/recursive_automaton.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from typing import AbstractSet


from pyformlang.finite_automaton.finite_automaton import to_symbol
from pyformlang.finite_automaton.symbol import Symbol
from pyformlang.regular_expression import Regex
Expand All @@ -19,143 +20,121 @@ class RecursiveAutomaton:

Parameters
----------
labels : set of :class:`~pyformlang.finite_automaton.Symbol`, optional
A finite set of labels for boxes
initial_label : :class:`~pyformlang.finite_automaton.Symbol`, optional
A start label for automaton
boxes : set of :class:`~pyformlang.rsa.Box`, optional
start_box : :class:`~pyformlang.rsa.Box`
Start box
boxes : set of :class:`~pyformlang.rsa.Box`
A finite set of boxes

"""

def __init__(self,
labels: AbstractSet[Symbol] = None,
initial_label: Symbol = None,
boxes: AbstractSet[Box] = None):

if labels is not None:
labels = {to_symbol(x) for x in labels}
self._labels = labels or set()

if initial_label is not None:
initial_label = to_symbol(initial_label)
if initial_label not in self._labels:
self._labels.add(initial_label)
self._initial_label = initial_label or Symbol("")

self._boxes = {}
if boxes is not None:
for box in boxes:
self._boxes.update({to_symbol(box.label): box})
self._labels.add(box.label)

for label in self._labels:
box = self.get_box(label)
if box is None:
raise ValueError(
"RSA must have the same number of labels and DFAs")

def get_box(self, label: Symbol):
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Write doc with parameters.

""" Box by label """

label = to_symbol(label)
if label in self._boxes:
return self._boxes[label]

return None

def add_box(self, new_box: Box):
""" Set a box
start_box: Box,
boxes: AbstractSet[Box]):
self._nonterminal_to_box = {}
if start_box not in boxes:
self._nonterminal_to_box[to_symbol(start_box.nonterminal)] = start_box
self._start_nonterminal = to_symbol(start_box.nonterminal)
for box in boxes:
self._nonterminal_to_box[to_symbol(box.nonterminal)] = box

def get_box_by_nonterminal(self, nonterminal: Symbol | str):
KubEF marked this conversation as resolved.
Show resolved Hide resolved
"""
Box by nonterminal

Parameters
-----------
new_box : :class:`~pyformlang.rsa.Box`
The new box
----------
nonterminal: :class:`~pyformlang.finite_automaton.Symbol` | str
the nonterminal of which represents a box

Returns
-----------
box : :class:`~pyformlang.rsa.Box` | None
box represented by given nonterminal
"""

self._boxes.update({new_box.label: new_box})
self._labels.add(to_symbol(new_box.label))

def get_number_of_boxes(self):
""" Size of set of boxes """

return len(self._boxes)
nonterminal = to_symbol(nonterminal)
if nonterminal in self._nonterminal_to_box:
return self._nonterminal_to_box[nonterminal]

def change_initial_label(self, new_initial_label: Symbol):
""" Set an initial label
return None

Parameters
-----------
new_initial_label : :class:`~pyformlang.finite_automaton.Symbol`
The new initial label
def get_number_boxes(self):
""" Size of set of boxes """

"""
return len(self._nonterminal_to_box)

new_initial_label = to_symbol(new_initial_label)
if new_initial_label not in self._labels:
raise ValueError(
"New initial label not in set of labels for boxes")
def to_dot(self):
KubEF marked this conversation as resolved.
Show resolved Hide resolved
""" Create dot representation of recursive automaton """
dot_string = 'digraph "" {'
for box in self._nonterminal_to_box.values():
dot_string += f'\n{box.to_subgraph_dot()}'
dot_string += "\n}"
return dot_string

@property
def labels(self) -> set:
""" The set of labels """
def nonterminals(self) -> set:
""" The set of nonterminals """

return self._labels
return set(self._nonterminal_to_box.keys())

@property
def boxes(self) -> dict:
""" The set of boxes """

return self._boxes
return self._nonterminal_to_box

@property
def start_nonterminal(self) -> Symbol:
""" The start nonterminal """

return self._start_nonterminal

@property
def initial_label(self) -> Symbol:
""" The initial label """
def start_box(self):
""" The start box """

return self._initial_label
return self.boxes[self.start_nonterminal]

@classmethod
def from_regex(cls, regex: Regex, initial_label: Symbol):
def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str):
""" Create a recursive automaton from regular expression

Parameters
-----------
regex : :class:`~pyformlang.regular_expression.Regex`
The regular expression
initial_label : :class:`~pyformlang.finite_automaton.Symbol`
The initial label for the recursive automaton
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str
The start nonterminal for the recursive automaton

Returns
-----------
rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
The new recursive automaton built from regular expression
"""

initial_label = to_symbol(initial_label)
box = Box(regex.to_epsilon_nfa().minimize(), initial_label)
return RecursiveAutomaton({initial_label}, initial_label, {box})
start_nonterminal = to_symbol(start_nonterminal)
box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal)
return RecursiveAutomaton(box, {box})

@classmethod
def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
""" Create a recursive automaton from text
def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")):
""" Create a recursive automaton from ebnf (ebnf = Extended Backus-Naur Form)

Parameters
-----------
text : str
The text of transform
start_symbol : str, optional
The start symbol, S by default
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional
The start nonterminal, S by default

Returns
-----------
rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
The new recursive automaton built from context-free grammar
"""

start_nonterminal = to_symbol(start_nonterminal)
productions = {}
boxes = set()
labels = set()
nonterminals = set()
for production in text.splitlines():
production = production.strip()
if "->" not in production:
Expand All @@ -164,7 +143,7 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
head, body = production.split("->")
head = head.strip()
body = body.strip()
labels.add(to_symbol(head))
nonterminals.add(to_symbol(head))

if body == "":
body = Epsilon().to_text()
Expand All @@ -177,11 +156,13 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
for head, body in productions.items():
boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(),
to_symbol(head)))
start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal)
return RecursiveAutomaton(start_box, boxes)

return RecursiveAutomaton(labels, start_symbol, boxes)

def is_equivalent_to(self, other):
""" Check whether two recursive automata are equivalent
def is_equals_to(self, other):
"""
Check whether two recursive automata are equals by boxes.
Not equivalency in terms of formal languages theory, just mapping boxes

Parameters
----------
Expand All @@ -191,23 +172,11 @@ def is_equivalent_to(self, other):
Returns
----------
are_equivalent : bool
Whether the two recursive automata are equivalent or not
Whether the two recursive automata are equals or not
"""

if not isinstance(other, RecursiveAutomaton):
return False

if self._labels != other._labels:
return False

for label in self._labels:
box_1 = self.get_box(label)
box_2 = other.get_box(label)

if box_1 != box_2:
return False

return True
return self.boxes == other.boxes

def __eq__(self, other):
return self.is_equivalent_to(other)
return self.is_equals_to(other)
Loading
Loading