### CMPN403 Programming Assignment
#### Regex to Minimized DFA Conversion
Team:
- Ahmed Tarek Abdellatif 1190157
- Mostafa Mohammad Mahmoud El-Nashar 1190212

#### State and FSM Classes
##### FSM reads/writes to json, performs thomson's algorithm, outputs image

In [10]:
import json
# from typing import Optional


class State:
    def __init__(self, name: str = "", transitions: dict[str, list[str]] = None, isTerminatingState: bool = False):
        self.name: str = name
        # {input : [list of states]}
        self.transitions: dict[str, list[str]] = transitions if transitions is not None else dict()
        self.isTerminatingState: bool = isTerminatingState

    def __iter__(self):
        yield ('@isTerminatingState', self.isTerminatingState)
        for key, value in self.transitions.items():
            yield (key, value)
            # yield (key if key != "" else "epsilon", value)


class FSM:

    stateCounter = -1

    @staticmethod
    def __getNewStateName():
        FSM.stateCounter += 1
        return "Q" + str(FSM.stateCounter)

    def __init__(self, literal: str = None):
        """
        *** Doesn't validate the literal string ***\n
        """
        self.nonTerminalStates: dict[str, State] = dict()  # {state_name : state}
        self.terminalStates: dict[str, State] = dict()  # {state_name : state}
        self.startingState: State = None
        if literal is not None: self.__literal_to_fsm(literal)

    def __literal_to_fsm(self, literal: str):
        """
        *** Doesn't validate the literal string ***\n
        Intializes the FSM to describe a literal string
        """
        if literal == "":
            s = State(FSM.__getNewStateName(), isTerminatingState=True)
            self.startingState = s
            self.terminalStates[s.name] = s
            return
        # Non-empty string
        older_state_name = starting_state_name = FSM.__getNewStateName()
        for char in literal:
            new_state_name = FSM.__getNewStateName()
            self.nonTerminalStates[older_state_name] = State(
                older_state_name, {char: [new_state_name]})
            older_state_name = new_state_name
        self.terminalStates[older_state_name] = State(older_state_name, isTerminatingState=True)
        self.startingState = self.nonTerminalStates[starting_state_name]

    @staticmethod
    def read_JSON(JSONFILE: str) -> "FSM":
        """
        Takes JSON file name as string and reads an fsm from it
        """
        fsm = FSM()
        with open(JSONFILE) as f:
            file_contents = f.read()
            parsed_json: dict[str, str | dict[str, bool | dict[str, list[str]]]] = json.loads(file_contents)
            starting_state_name = parsed_json["@startingState"]
            parsed_json.pop("@startingState")
            for name, transitions in parsed_json.items():
                terminal = transitions["@isTerminatingState"]
                transitions.pop("@isTerminatingState")
                s = State(name, transitions, terminal)
                if name == starting_state_name:
                    fsm.startingState = s
                if terminal:
                    fsm.terminalStates[name] = s
                else:
                    fsm.nonTerminalStates[name] = s
        return fsm

    def write_JSON(self, JSONFILE: str):
        """
        Takes JSON file name as string and writes the fsm to it
        """
        with open(JSONFILE, "w") as f:
            data = {'@startingState': self.startingState.name}
            data.update({s.name: dict(s) for s in self.nonTerminalStates.values()})
            data.update({s.name: dict(s) for s in self.terminalStates.values()})
            json.dump(data, f, indent=4, sort_keys=True)

    def Concatenate(self, fsm: "FSM") -> "FSM":
        """
        Concatenates the current FSM with another FSM ***in place***\n
        i.e ( self . fsm )
        """
        past_terminal_states = self.terminalStates
        # Adding the old terminal states to the non terminal states
        self.nonTerminalStates.update(past_terminal_states)
        # Adding the new non terminal states to the non terminal states
        self.nonTerminalStates.update(fsm.nonTerminalStates)
        # Updating the terminal states to the new ones
        self.terminalStates = fsm.terminalStates
        for state in past_terminal_states.values():
            state.isTerminatingState = False
            if state.transitions.get("") is None:
                state.transitions[""] = [fsm.startingState.name]
            else:
                self.transitions[""].append(fsm.startingState.name)
        return self

    def Alternate(self, fsm: "FSM") -> "FSM":
        """
        Alternates the current FSM with another FSM ***in place***\n
        i.e ( self | fsm )
        """
        new_starting_state = State(FSM.__getNewStateName())
        new_terminating_state = State(FSM.__getNewStateName(), isTerminatingState=True)

        # Mapping epislon moves from new starting state to old starting states
        new_starting_state.transitions[""] = [self.startingState.name, fsm.startingState.name]

        past_terminal_states = self.terminalStates
        past_terminal_states.update(fsm.terminalStates)
        # Adding the old terminal states to the non terminal states
        self.nonTerminalStates.update(past_terminal_states)
        # Adding the other fsm's non terminal states to the non terminal states
        self.nonTerminalStates.update(fsm.nonTerminalStates)
        self.nonTerminalStates[new_starting_state.name] = new_starting_state
        # Updating the starting state to the new one
        self.startingState = new_starting_state
        # Updating the terminal states to the new one
        self.terminalStates = {new_terminating_state.name: new_terminating_state}

        # Mapping epislon moves from old terminal states to new terminal state
        for state in past_terminal_states.values():
            state.isTerminatingState = False
            if not state.transitions.get(""):
                state.transitions[""] = [new_terminating_state.name]
            else:
                state.transitions[""].append(new_terminating_state.name)
        return self

    def Zero_Or_More(self) -> "FSM":
        """
        Applies Kleene star to the current FSM ***in place***\n
        i.e ( self* )
        """
        new_starting_state = State(FSM.__getNewStateName())
        new_terminating_state = State(FSM.__getNewStateName(), isTerminatingState=True)

        # Mapping epislon moves from new starting state to old starting state and new terminating state
        new_starting_state.transitions[""] = [self.startingState.name, new_terminating_state.name]

        past_terminal_states = self.terminalStates
        # Adding the old terminal states to the non terminal states
        self.nonTerminalStates.update(past_terminal_states)
        self.nonTerminalStates[new_starting_state.name] = new_starting_state
        # Updating the starting state to the new one
        self.startingState = new_starting_state
        # Updating the terminal states to the new one
        self.terminalStates = {new_terminating_state.name: new_terminating_state}

        # Mapping epislon moves from old terminal states to new terminal state and new starting state
        for state in past_terminal_states.values():
            state.isTerminatingState = False
            if not state.transitions.get(""):
                state.transitions[""] = [new_terminating_state.name, self.startingState.name]
            else:
                state.transitions[""].extend([new_terminating_state.name, self.startingState.name])
        return self

    def One_or_More(self) -> "FSM":
        """
        Applies Kleene plus to the current FSM ***in place***\n
        i.e ( self+ )
        """
        new_starting_state = State(FSM.__getNewStateName())
        new_terminating_state = State(FSM.__getNewStateName(), isTerminatingState=True)

        # Mapping epislon moves from new starting state to old starting state
        new_starting_state.transitions[""] = [self.startingState.name]

        past_terminal_states = self.terminalStates
        # Adding the old terminal states to the non terminal states
        self.nonTerminalStates.update(past_terminal_states)
        self.nonTerminalStates[new_starting_state.name] = new_starting_state
        # Updating the starting state to the new one
        self.startingState = new_starting_state
        # Updating the terminal states to the new one
        self.terminalStates = {new_terminating_state.name: new_terminating_state}

        # Mapping epislon moves from old terminal states to new terminal state and new starting state
        for state in past_terminal_states.values():
            state.isTerminatingState = False
            if not state.transitions.get(""):
                state.transitions[""] = [new_terminating_state.name, self.startingState.name]
            else:
                state.transitions[""].extend([new_terminating_state.name, self.startingState.name])
        return self

    def Optional(self) -> "FSM":
        """
        Applies Kleene question mark to the current FSM ***in place***\n
        i.e ( self? )
        """
        new_starting_state = State(FSM.__getNewStateName())
        new_terminating_state = State(FSM.__getNewStateName(), isTerminatingState=True)

        # Mapping epislon moves from new starting state to old starting state and new terminating state
        new_starting_state.transitions[""] = [self.startingState.name, new_terminating_state.name]

        past_terminal_states = self.terminalStates
        # Adding the old terminal states to the non terminal states
        self.nonTerminalStates.update(past_terminal_states)
        self.nonTerminalStates[new_starting_state.name] = new_starting_state
        # Updating the starting state to the new one
        self.startingState = new_starting_state
        # Updating the terminal states to the new one
        self.terminalStates = {new_terminating_state.name: new_terminating_state}

        # Mapping epislon moves from old terminal states to new terminal state
        for state in past_terminal_states.values():
            state.isTerminatingState = False
            if not state.transitions.get(""):
                state.transitions[""] = [new_terminating_state.name]
            else:
                state.transitions[""].extend([new_terminating_state.name])
        return self


#### Testing FSM class

In [11]:
FSM('ab').Alternate(FSM('cd')).write_JSON('ab or cd.json')
FSM('ab').Concatenate(FSM('cd')).write_JSON('abcd.json')
FSM('ab').Zero_Or_More().write_JSON('abstar.json')
FSM('ab').One_or_More().write_JSON('abplus.json')
FSM('ab').Optional().write_JSON('aboptional.json')
FSM('ab').Alternate(FSM('cd')).Zero_Or_More().write_JSON('_ab or cd_star.json')
FSM.read_JSON("ab or cd.json").write_JSON('.json')

#### String to NFA

In [17]:
def string2NFA(regex: str) -> "FSM":
    """
    Converts a string to an NFA
    """
    i = 0
    last_special_index = -1
    result_fsm = None
    # Parsing the regex string manually L to R Associative
    while i < len(regex):
        # Assume no morphing will be done, i.e non-special character
        modifying_fsm = None
        # Handle special characters
        if regex[i] == '(':
            open_bracket_index = i
            bracket_depth = 1
            while bracket_depth > 0:
                i += 1
                if regex[i] == '(':
                    bracket_depth += 1
                elif regex[i] == ')':
                    bracket_depth -= 1
            last_special_index = close_bracket_index = i
            modifying_fsm = string2NFA(regex[open_bracket_index + 1:close_bracket_index])
            if open_bracket_index != 0:
                modifying_fsm = FSM(regex[last_special_index + 1:open_bracket_index]).Concatenate(modifying_fsm)
        elif regex[i] == '*':
            modifying_fsm = string2NFA(regex[last_special_index + 1:i]).Zero_Or_More()
        elif regex[i] == '+':
            modifying_fsm =	string2NFA(regex[last_special_index + 1:i]).One_or_More()
        elif regex[i] == '?':
            modifying_fsm = string2NFA(regex[last_special_index + 1:i]).Optional()
        elif regex[i] == '|':
            return result_fsm.Alternate(string2NFA(regex[i + 1:]))
        elif regex[i] == '[':
            # TODO: Add support for character classes
            pass
        # Handle non-special characters
        if modifying_fsm is not None:
            if result_fsm is None:
                result_fsm = modifying_fsm
            else:
                result_fsm.Concatenate(modifying_fsm)
            last_special_index = i
        i += 1

    if result_fsm is None:
        return FSM(regex)
    elif last_special_index == len(regex) - 1:
        return result_fsm
    else:
        return result_fsm.Concatenate(FSM(regex[last_special_index + 1:]))

In [25]:
FSM.stateCounter = -1
string2NFA('ab*|c').write_JSON('test.json')

In [26]:
import re


def verify_regex(regex):
    try:
        re.compile(regex)
    except re.error:
        print("regex Rejected")
    else:
        if re.search(r"(^\/{1,})|(\/{2})$", regex):
            print("regex rejected")
        else:
            print("SUCCESS")
            # todo : C'MON do something


x = input("Enter a regex to verify : ")
verify_regex(x)


SUCCESS
