# Алгоритм преобразования DFA в regex

Пусть $\mathcal{M} = (Q = \{Q_0, ..., Q_n \}, \Sigma, \delta, Q_0, F)$ — детерминированный конечный автомат, $\mathcal{K}$ — алгебра регулярных выражений над алфавитом $\Sigma$ сигнатуры $\langle \varepsilon, \varnothing, +, \cdot, ^* \rangle$.\
Рассмотрим следующую систему

$$
\begin{cases}
    \begin{gather*}
    Q_0 = \alpha_{11}Q_1 + ... + \alpha_{1n}Q_n + \beta_i, \\
    ... \tag{1}\\
    Q_n = \alpha_{n1}Q_1 + ... + \alpha_{nn}Q_n + \beta_n; \\
    \end{gather*}
\end{cases}
$$

где $\alpha_{ij} = \varnothing$, если $\nexists \alpha \in \Sigma: \delta(Q_i, \alpha) = Q_j$, или $\alpha_{ij} \in \Sigma: \delta(Q_i, \alpha_{ij}) = Q_j$, а $\beta_i = \begin{cases}\varepsilon, \; Q_i \in F, \\ \varnothing, \; Q_i \notin F. \end{cases}$

**Утверждение**\
$(\gamma_0, ..., \gamma_n)$ — наименьшее решение системы $(1)$ $\Rightarrow$ $L(\gamma_0) = L(\mathcal{M}).$

Перевод DFA в regex реализован в методе to_regex класса DFA.

## Парсер

Синтаксис входных данных чуть дополнен: алфавит $\Sigma$ может допускать цифры, а индекс у состояний может быть любым натуральным числом.

Разбор DFA происходит в методе parse класса DFA.

Регулярные выражения входных данных импортируются из файла [rules.py](https://github.com/SmEgDm/tfl/tree/main/labs/lab2/dfa_to_regex/rules.py).

## Класс DFA

In [1]:
import sys
sys.path.append('../regex_system_solver/')

import re
import Regex
import Equation
from graphviz import Digraph
import rules

# Новое правило разбора символа регулярного выражения
def parse_symbol(tokens):
    if re.fullmatch('[a-z0-9εØ]', tokens[0]):
        return Regex.Regex(Regex.Operation.CONST, tokens.pop(0))

# Новое правило разбора переменных
def parse_var(tokens):
    def parse_index():
        if len(tokens) != 0 and re.fullmatch(r'[\u2080-\u2089]', tokens[0]):
            return tokens.pop(0) + parse_index()
        return ''
    if len(tokens) != 0 and re.fullmatch(r'[A-Z]', tokens[0]):
        var = tokens.pop(0)
        return var + parse_index()

Regex.Regex.set_parse_symbol(parse_symbol)
Regex.Regex.set_one('ε')
Regex.Regex.set_zero('Ø')
Equation.Equation.set_parse_var(parse_var)

class DFA:
    def __init__(self, states, start_state, finite_states, transitions):
        self.states = states
        self.start_state = start_state
        self.finite_states = finite_states
        self.transitions = transitions
    
    def parse(text):
        if not re.fullmatch(rules.dfa, text):
            raise Exception('Incorrect DFA')
        def to_unicode(state):
            state = state.strip()
            index = ''
            for digit in state[1:]:
                index += chr(ord('\u2080') | int(digit))
            return f'{state[0]}{index}'
        start_state = to_unicode(re.search(rules.state, text).group(0))
        states = set()
        transitions = []
        for transition_text in re.findall(rules.transition, text):
            transition = re.findall(f'<({rules.state}),({rules.letter}),({rules.state})>', transition_text)[0]
            transitions.append((to_unicode(transition[0]), transition[1].strip(), to_unicode(transition[2])))
            states.add(to_unicode(transition[0])); states.add(to_unicode(transition[2]))
        finite_states = set()
        for finite_state in re.findall(rules.state, re.search(rules.finite_states, text).group(0)):
            finite_states.add(to_unicode(finite_state))
        return DFA(states, start_state, finite_states, transitions)
    
    def to_graph(self):
        graph = Digraph(graph_attr={'rankdir': 'LR'}, format='svg')
        graph.node('', shape='none')
        graph.node(self.start_state, shape='circle')
        graph.edge('', self.start_state)
        for state, letter, other_state in self.transitions:
            graph.node(state, shape='circle')
            graph.node(other_state, shape='circle')
            graph.edge(state, other_state, letter)
        for finite_state in self.finite_states:
            graph.node(finite_state, shape='doublecircle')
        return graph

    def to_regex(self):
        start_equation = None
        system = []
        for state in self.states:
            terms = []
            for transition in self.transitions:
                if transition[0] == state:
                    terms.append(transition[1] + transition[2])
            if state in self.finite_states:
                terms.append('ε')
            system.append(Equation.Equation.parse(f'{state}={"+".join(terms)}'))
            if state == self.start_state:
                start_equation = system[-1]
        Equation.solve_system(system)
        return str(start_equation).split('=')[1].strip()

## Тесты

In [2]:
import os
from ipywidgets import Tab, HTML

tests_count = len(os.listdir('tests'))
graphs_path = 'https://raw.githubusercontent.com/SmEgDm/tfl/tree/main/labs/lab2/dfa_to_regex/graphs'

children = []
output_pattern = lambda input_text, result: HTML(
    f'''
    <div>
        <b>Input</b><br>
        {input_text}<br><br>
        <b>Result</b><br>
        {result}
    </div>
    '''
)

for i in range(tests_count):
    input_dfa = None
    with open(f'tests/test_{i}.txt') as f:
        try:
            text = f.read()
            dfa = DFA.parse(text)
            dfa.to_graph().render(filename=f'graphs/graph_{i + 1}')
            svg = f'<img src=\'{graphs_path}/graph_{i + 1}.svg\' alt=graph_{i}>'
            children.append(output_pattern(svg, dfa.to_regex()))
        except Exception as err:
            children.append(output_pattern(text.replace('<', '&lt;').replace('>', '&gt;'), f'<b>{err}</b>'))

tab = Tab()
for i in range(tests_count):
    tab.set_title(i, f'Test {i + 1}')
tab.children = children

tab

Tab(children=(HTML(value="\n    <div>\n        <b>Input</b><br>\n        <img src='graphs/graph_1.svg' alt=gra…

False