In [37]:
#from IPython.display import JSON
from subprocess import getoutput
import os
import re 
import json

In [17]:
def ValidateRegex (RegexInput):
  try:
    re.compile(RegexInput)
    return True

  except re.error:
    print("Non valid regex pattern")
    exit()

In [4]:
def ExtractAtomicExpressions(regex):
    stack = list() 
    atomic = str()
    atomics = list()
    for c in regex:
        if c != ")":
            stack.append(c)
        else:
            while stack[-1] != "(" and len(stack) > 0 : #Extract the atomic expression that corresponds to the closing paranthesis found
                atomic = stack[-1] + atomic
                stack.pop(-1)
            
            stack.pop(-1) #remove the opening bracket from the stack
            atomics.append(atomic)
            atomic = ""
    #atomics.reverse()
    return atomics

In [5]:
class State:
    stateID=None
    edges=[]
    transitions=[]    

In [6]:
class NFA:
    initial = None
    accept = None

    def __init__(self, initial, accept):
        self.initial = initial
        self.accept = accept or []

In [7]:
def InsertConcatenationDot(regex):
    regex = re.sub(r'\)(\w|\()', r').\1', regex)
    regex = re.sub(r'\)(\()', r').\1', regex)
    regex = re.sub(r'(\w|\))(\(|\w)', r'\1.\2', regex)
    regex = re.sub(r'(\w|\))\(', r'\1.(', regex)


    return regex




In [19]:
def MoveSymbolToEndOfAtomicExpression(expression):
    for i in range(len(expression)):
        if expression[i] == "|":
            return expression[0:i] + expression[i+1:] + "|"
        if expression[i] == ".":
            return expression[0:i] + expression[i+1:] + "."
    return expression
            

In [163]:
def RegexToNFA(regex):   
    if ValidateRegex(regex):
        regex = InsertConcatenationDot(regex)
        regex = regex.replace(" ", "")
        atomics = ExtractAtomicExpressions(regex)
        states = list()
        for i in range(len(atomics)):
            atomics[i] = MoveSymbolToEndOfAtomicExpression(atomics[i])
        counter = -1
        stack = []
        for atomic in atomics:
            match = re.search(r'\[.*?\]', regex)
            if match:
                character_class = match.group()
                counter=counter+1
                initial = State()
                initial.stateID=counter
                accept = State()
                accept.stateID=counter+1
                initial.edges.append((character_class, accept))
                accept.edges=[]
                stack.append(NFA(initial, [accept]))
                counter=counter+1
            else:                
                for i in range(len(atomic)):
                    if regex[i] == '.':
                        NFA2 = stack.pop()
                        NFA1 = stack.pop()
                        NFA1.accept[0].edges.append(("ε", NFA2.initial))
                        NFA1.accept = NFA2.accept
                        stack.append(NFA1)
                    elif regex[i] == '|':
                        NFA2 = stack.pop()
                        NFA1 = stack.pop()
                        counter=counter+1
                        initial = State()
                        initial.stateID=counter
                        initial.edges.append(("ε", NFA1.initial))
                        initial.edges.append(("ε", NFA2.initial))
                        accept = NFA1.accept + NFA2.accept
                        stack.append(NFA(initial, accept))
                    elif regex[i] == '*':
                        counter=counter+1
                        NFA0 = stack.pop()
                        initial = State()
                        initial.stateID=counter
                        accept = State()
                        counter=counter+1
                        accept.stateID=counter
                        initial.edges.append(("ε", NFA0.initial))
                        initial.edges.append(("ε", accept))
                        NFA0.accept[0].edges.append(("ε", initial))
                        NFA0.accept[0].edges.append(("ε", accept))
                        stack.append(NFA(initial, [accept]))
                    
                    elif regex[i] == '?':
                        counter=counter+1
                        NFA0 = stack.pop()
                        initial = State()
                        initial.stateID=counter
                        accept = State()
                        counter=counter+1
                        accept.stateID=counter
                        initial.edges.append(("ε", NFA0.initial))
                        initial.edges.append(("ε", accept))
                        NFA0.accept[0].edges.append(("ε", accept))
                        stack.append(NFA(initial, [accept]))
                    elif regex[i] == '+':
                        counter=counter+1
                        NFA0 = stack.pop()
                        initial = State()
                        initial.stateID=counter
                        accept = State()
                        counter=counter+1
                        accept.stateID=counter
                        initial.edges.append(("ε", NFA0.initial))
                        NFA0.accept[0].edges.append(("ε", initial))
                        NFA0.accept[0].edges.append(("ε", accept))
                        accept.edges.append(("ε", initial))
                        stack.append(NFA(initial, [accept]))
                
                    else:
                        counter=counter+1
                        initial = State()
                        initial.stateID=counter
                        accept = State()
                        accept.stateID=counter+1
                        initial.edges.append((regex[i], accept))
                        stack.append(NFA(initial, [accept]))
                        counter=counter+1
    if len(stack) >= 1:
        return stack.pop()


In [164]:
def WriteJsonFile(NFA):
 
    data = dict()
    visited = set()
    data["startingState"]="S"+str(NFA.initial.stateID)
    queue = [NFA.initial]
    while queue:
        state = queue.pop(0)
        if state in visited:
            continue
        visited.add(state)
        for char, dest_state in state.edges:
            data["S"+str(state.stateID)]=dict()
            data["S"+str(state.stateID)]["isTerminatingState"]="false"
            for char, dest_state in state.edges:
                data["S"+str(state.stateID)][str(char)]="S"+str(dest_state.stateID)
                queue.append(dest_state)
    for accept_state in NFA.accept:
       data["S"+str(accept_state.stateID)]=dict()
       data["S"+str(accept_state.stateID)]["isTerminatingState"]="true"
       for char, dest_state in accept_state.edges:
            data["S"+str(accept_state.stateID)][str(char)]="S"+str(dest_state.stateID)
            visited.add(accept_state)
    f = open('NFA.json','w')
    json.dump(data,f)
    f.close()
    return data

In [165]:
#regex= "((a(b*))c)"
regex= "([a-c])"
nfa=RegexToNFA(regex)
WriteJsonFile(nfa)

{'startingState': 'S0',
 'S0': {'isTerminatingState': 'false', '[a-c]': 'S1'},
 'S1': {'isTerminatingState': 'true'}}