Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rsa rework #1

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 41 additions & 43 deletions pyformlang/rsa/box.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pyformlang.finite_automaton.epsilon_nfa import EpsilonNFA
from pyformlang.finite_automaton.finite_automaton import to_symbol
from pyformlang.finite_automaton.symbol import Symbol
import networkx as nx


class Box:
Expand All @@ -16,56 +17,57 @@ class Box:
----------
enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
A epsilon nfa
label : :class:`~pyformlang.finite_automaton.Symbol`
A label for epsilon nfa
nonterminal : :class:`~pyformlang.finite_automaton.Symbol`
A nonterminal for epsilon nfa

"""

def __init__(self, enfa: EpsilonNFA = None, label: Symbol = None):
if enfa is not None:
enfa = enfa.minimize()
self._dfa = enfa or EpsilonNFA()

if label is not None:
label = to_symbol(label)
self._label = label or Symbol("")

def change_label(self, label: Symbol):
""" Set a new label

Parameters
-----------
label : :class:`~pyformlang.finite_automaton.Symbol`
The new label for automaton

"""
self._label = to_symbol(label)

def change_dfa(self, enfa: EpsilonNFA):
""" Set an epsilon finite automaton

Parameters
-----------
enfa : :class:`~pyformlang.finite_automaton.EpsilonNFA`
The new epsilon finite automaton

"""
enfa = enfa.minimize()
def __init__(self, enfa: EpsilonNFA, nonterminal: Symbol | str):
self._dfa = enfa

nonterminal = to_symbol(nonterminal)
self._nonterminal = nonterminal

def to_subgraph_dot(self):
graph = self._dfa.to_networkx()
strange_nodes = []
dot_string = (f'subgraph cluster_{self._nonterminal}\n{{ label="{self._nonterminal}"\n'
f'fontname="Helvetica,Arial,sans-serif"\n'
f'node [fontname="Helvetica,Arial,sans-serif"]\n'
f'edge [fontname="Helvetica,Arial,sans-serif"]\nrankdir=LR;\n'
f'node [shape = circle style=filled fillcolor=white]')
for node, data in graph.nodes(data=True):
if 'is_start' not in data.keys() or 'is_final' not in data.keys():
strange_nodes.append(node)
continue
node = node.replace(";", "")
if data['is_start']:
dot_string += f'\n{node} [fillcolor = green];'
if data['is_final']:
dot_string += f'\n{node} [shape = doublecircle];'
for strange_node in strange_nodes:
graph.remove_node(strange_node)
for node_from, node_to, data in graph.edges(data=True):
node_from = node_from.replace(";", "")
node_to = node_to.replace(";", "")
label = data['label']
dot_string += f'\n{node_from} -> {node_to} [label = "{label}"];'
dot_string += "\n}"
return dot_string

@property
def dfa(self):
""" Box's dfa """
return self._dfa

@property
def label(self):
""" Box's label """
return self._label
def nonterminal(self):
""" Box's nonterminal """
return self._nonterminal

@property
def start_state(self):
""" The start state """
def start_states(self):
""" The start states """
return self._dfa.start_states

@property
Expand All @@ -90,14 +92,10 @@ def is_equivalent_to(self, other):
if not isinstance(other, Box):
return False

if self._dfa.is_equivalent_to(other.dfa) and \
self._label == other.label:
return True

return False
return self._dfa.is_equivalent_to(other.dfa) and self.nonterminal == other.nonterminal

def __eq__(self, other):
return self.is_equivalent_to(other)

def __hash__(self):
return self._label.__hash__()
return self._nonterminal.__hash__()
156 changes: 56 additions & 100 deletions pyformlang/rsa/recursive_automaton.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from typing import AbstractSet

import pydot

from pyformlang.finite_automaton.finite_automaton import to_symbol
from pyformlang.finite_automaton.symbol import Symbol
from pyformlang.regular_expression import Regex
Expand All @@ -19,89 +21,49 @@ class RecursiveAutomaton:

Parameters
----------
labels : set of :class:`~pyformlang.finite_automaton.Symbol`, optional
A finite set of labels for boxes
initial_label : :class:`~pyformlang.finite_automaton.Symbol`, optional
A start label for automaton
boxes : set of :class:`~pyformlang.rsa.Box`, optional
start_box : :class:`~pyformlang.rsa.Box`
Start box
boxes : set of :class:`~pyformlang.rsa.Box`
A finite set of boxes

"""

def __init__(self,
labels: AbstractSet[Symbol] = None,
initial_label: Symbol = None,
boxes: AbstractSet[Box] = None):

if labels is not None:
labels = {to_symbol(x) for x in labels}
self._labels = labels or set()

if initial_label is not None:
initial_label = to_symbol(initial_label)
if initial_label not in self._labels:
self._labels.add(initial_label)
self._initial_label = initial_label or Symbol("")

start_box: Box,
boxes: AbstractSet[Box]):
self._boxes = {}
if boxes is not None:
for box in boxes:
self._boxes.update({to_symbol(box.label): box})
self._labels.add(box.label)
if start_box not in boxes:
self._boxes.update({to_symbol(start_box.nonterminal): start_box})
self._start_nonterminal = to_symbol(start_box.nonterminal)
for box in boxes:
self._boxes.update({to_symbol(box.nonterminal): box})

for label in self._labels:
box = self.get_box(label)
if box is None:
raise ValueError(
"RSA must have the same number of labels and DFAs")
def get_box_by_nonterminal(self, nonterminal: Symbol | str):
""" Box by nonterminal """

def get_box(self, label: Symbol):
""" Box by label """

label = to_symbol(label)
if label in self._boxes:
return self._boxes[label]
nonterminal = to_symbol(nonterminal)
if nonterminal in self._boxes:
return self._boxes[nonterminal]

return None

def add_box(self, new_box: Box):
""" Set a box

Parameters
-----------
new_box : :class:`~pyformlang.rsa.Box`
The new box

"""

self._boxes.update({new_box.label: new_box})
self._labels.add(to_symbol(new_box.label))

def get_number_of_boxes(self):
""" Size of set of boxes """

return len(self._boxes)

def change_initial_label(self, new_initial_label: Symbol):
""" Set an initial label

Parameters
-----------
new_initial_label : :class:`~pyformlang.finite_automaton.Symbol`
The new initial label

"""

new_initial_label = to_symbol(new_initial_label)
if new_initial_label not in self._labels:
raise ValueError(
"New initial label not in set of labels for boxes")
def to_dot(self):
dot_string = f'digraph ""{{'
for box in self._boxes.values():
dot_string += f'\n{box.to_subgraph_dot()}'
dot_string += "\n}"
return dot_string

@property
def labels(self) -> set:
""" The set of labels """
def nonterminals(self) -> set:
""" The set of nonterminals """

return self._labels
return set(self._boxes.keys())

@property
def boxes(self) -> dict:
Expand All @@ -110,52 +72,57 @@ def boxes(self) -> dict:
return self._boxes

@property
def initial_label(self) -> Symbol:
""" The initial label """
def start_nonterminal(self) -> Symbol:
""" The start nonterminal """

return self._start_nonterminal

@property
def start_box(self):
""" The start box """

return self._initial_label
return self.boxes[self.start_nonterminal]

@classmethod
def from_regex(cls, regex: Regex, initial_label: Symbol):
def from_regex(cls, regex: Regex, start_nonterminal: Symbol | str):
""" Create a recursive automaton from regular expression

Parameters
-----------
regex : :class:`~pyformlang.regular_expression.Regex`
The regular expression
initial_label : :class:`~pyformlang.finite_automaton.Symbol`
The initial label for the recursive automaton
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str
The start nonterminal for the recursive automaton

Returns
-----------
rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
The new recursive automaton built from regular expression
"""

initial_label = to_symbol(initial_label)
box = Box(regex.to_epsilon_nfa().minimize(), initial_label)
return RecursiveAutomaton({initial_label}, initial_label, {box})
start_nonterminal = to_symbol(start_nonterminal)
box = Box(regex.to_epsilon_nfa().minimize(), start_nonterminal)
return RecursiveAutomaton(box, {box})

@classmethod
def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
""" Create a recursive automaton from text
def from_ebnf(cls, text, start_nonterminal: Symbol | str = Symbol("S")):
""" Create a recursive automaton from ebnf

Parameters
-----------
text : str
The text of transform
start_symbol : str, optional
The start symbol, S by default
start_nonterminal : :class:`~pyformlang.finite_automaton.Symbol` | str, optional
The start nonterminal, S by default

Returns
-----------
rsa : :class:`~pyformlang.rsa.RecursiveAutomaton`
The new recursive automaton built from context-free grammar
"""

start_nonterminal = to_symbol(start_nonterminal)
productions = {}
boxes = set()
labels = set()
nonterminals = set()
for production in text.splitlines():
production = production.strip()
if "->" not in production:
Expand All @@ -164,7 +131,7 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
head, body = production.split("->")
head = head.strip()
body = body.strip()
labels.add(to_symbol(head))
nonterminals.add(to_symbol(head))

if body == "":
body = Epsilon().to_text()
Expand All @@ -177,11 +144,12 @@ def from_text(cls, text, start_symbol: Symbol = Symbol("S")):
for head, body in productions.items():
boxes.add(Box(Regex(body).to_epsilon_nfa().minimize(),
to_symbol(head)))
start_box = Box(Regex(productions[start_nonterminal.value]).to_epsilon_nfa().minimize(), start_nonterminal)
return RecursiveAutomaton(start_box, boxes)

return RecursiveAutomaton(labels, start_symbol, boxes)

def is_equivalent_to(self, other):
""" Check whether two recursive automata are equivalent
# equivalency not in terms of formal languages theory. Just mapping boxes.
def is_equals_to(self, other):
""" Check whether two recursive automata are equals by boxes

Parameters
----------
Expand All @@ -191,23 +159,11 @@ def is_equivalent_to(self, other):
Returns
----------
are_equivalent : bool
Whether the two recursive automata are equivalent or not
Whether the two recursive automata are equals or not
"""

if not isinstance(other, RecursiveAutomaton):
return False

if self._labels != other._labels:
return False

for label in self._labels:
box_1 = self.get_box(label)
box_2 = other.get_box(label)

if box_1 != box_2:
return False

return True
return self.boxes == other.boxes

def __eq__(self, other):
return self.is_equivalent_to(other)
return self.is_equals_to(other)
Loading