-
Notifications
You must be signed in to change notification settings - Fork 0
/
Regex2DFA.py
121 lines (101 loc) · 3 KB
/
Regex2DFA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from SyntaxTree import *
from Automata import *
def create_token_queue(INPUT):
'''
Process the input and converts it to a list containing the regex elements and alphabets.
Args:
INPUT: string, containing the input
Returns:
list, containing the regex elements and alphabets.
'''
tokens = []
id = ''
for c in INPUT:
if c in ['(', ')', '.', '*', '+']:
if id != '':
tokens.append(id)
id = ''
tokens.append(c)
else:
id = id + c
if id != '':
tokens.append(id)
return tokens
def create_postfix_token_queue(tokens):
'''
Creates the postfix representation of the regex (stored in a list). This postfix representation is later used to create the Syntax Tree.
Args:
tokens: list, containing the regex elements and alphabets
Returns:
list, containing the regex elements and alphabets in a postfix manner.
'''
output_queue = []
stack = []
for token in tokens:
if token == '(':
stack.append('(')
elif token == ')':
while (len(stack) > 0 and stack[-1] != '('):
output_queue.append(stack.pop())
stack.pop()
elif token == '*':
stack.append(token)
elif token == '.':
while len(stack) > 0 and stack[-1] == '*':
output_queue.append(stack.pop())
stack.append(token)
elif token == '+':
while len(stack) > 0 and (stack[-1] == '*' or stack[-1] == '.'):
output_queue.append(stack.pop())
stack.append(token)
else:
output_queue.append(token)
while (len(stack) > 0):
output_queue.append(stack.pop())
return output_queue
def read_input(path):
'''
Reads in the input which should be in the following format:
<N, number of alphabets>
<alphabet 1>
<alphabet 2>
<alphabet ...>
<alphabet N>
<REGEX>
for more detail on the input please refer to InOut_Formatting.md
Args:
path: string, the path to the input file
Returns:
list, containing the alphabets
string, containing the Regex
'''
alph = []
file = open(path)
lines = file.readlines()
file.close()
for i in range(int(lines[0])):
alph.append(lines[1 + i].strip())
return alph, lines[int(lines[0]) + 1].strip()
def regex2DFA(path):
'''
Computes the DFA of a regular expression
Args:
path: string, the path to the input file
Returns:
None
'''
# 1. Reading the input
ALPH, INPUT = read_input(path)
# 2. Getting the tokens
tokens = create_token_queue(INPUT)
# 3. Converting the tokens to post-order format
post = create_postfix_token_queue(tokens)
# 4. Creating the tree
t = Tree(post)
# 5. Creating the DFA
d = DFA(ALPH, t)
# 6. Printing the results
print(INPUT)
print(t)
print(d)
regex2DFA('Inputs\\Input4.txt')