#### Recursive Descent Parsing
CS 236 <br>
Fall 2023

Michael A. Goodrich <br>
Brigham Young University <br>
February 2023
***

Consider the following LL(1) Grammar
* $E \rightarrow N \ | \ OEE$
* $O \rightarrow +\ |\ *$ 
* $N \rightarrow 0\ |\ 1\ |\ 2\ |\ 3$

The starting non-terminal is $E$

The first sets are
* $FIRST(OEE) = FIRST(O) = \{+,*\}$
* $FIRST(N) = \{0,1,2,3\}$

***

Let's construct a class for the recursive descent parser (RDP). There will be a function for each nonterminal and a function to test whether the current input character what the grammar expects. We'll call this testing function ___current_input_matches_target_.

***

In [11]:
from typing import Callable, Optional
class RDP:  # RDP stands for recursive descent parser
    def __init__(self) -> None:
        ###################################
        # Tuple defining an LL(1) grammar #
        # • set of nonterminals           #
        # • set of terminals              #
        # • starting nonterminal          #
        # • set of productions            #
        ###################################
        self.nonterminals: set[str] = {'E', 'O', 'N'}  # set of nonterminals. Each nonterminal will have its own function
        self.starting_nonterminal: Callable[[], None] = self.e  # Starting nonterminal
        self.terminals: set[str] = {'+', '*', '0', '1', '2', '3'}  # set of terminals
        # Productions                               # Defined within the nonterminal functions

        ##########################################
        # Define FIRST sets for each nonterminal #
        ##########################################
        self.first: dict[str, set[str]] = dict()
        self.first['O'] = {'+', '-'}
        self.first['N'] = {'0', '1', '2', '3'}

        ###########################################
        # Variables for Managing the input string #
        ###########################################
        self.input: Optional[str] = None
        self.num_chars_read: int = 0

        ###########################################
        # Variables for printing the trace        #
        ###########################################
        self.tree_depth: int = 0

    def parse_input(self, input_: str) -> None:
        """ Call this function from main.
            It gets the input, calls the starting non-terminal,
            and does the accounting to see if the parse was successful
        """
        print("Parsing string", input_)
        self.input = input_
        self.starting_nonterminal()  # Run the RDP by calling the starting nonterminal
        if self.num_chars_read == len(self.input):
            print("Successfully parsed string")
        else:
            raise ValueError(
                "End of parse and these characters haven't been read: " + str(self.input[self.num_chars_read:]))

    ##############################################################
    # Each nonterminal gets its own function.                      #
    # The function knows which productions have the nonterminal on #
    # the left hand side of the production. The correct right    #
    # hand side of the production is chosen by looking at the    #
    # current input and the FIRST set of the right hand side     #
    ##############################################################
    def e(self) -> None:
        # Production E--> N | OEE
        self.__print_entry_message("E")
        print(self.__get_tab_string(), "Trying to read input", self.__get_current_input())
        current_input = self.__get_current_input()
        if current_input in self.first['N']:
            self.n()
        elif current_input in self.first['O']:
            self.o()
            self.e()
            self.e()
        else:  # error
            raise ValueError("Current input is " + str(current_input) + ", which cannot be produced by 'E'")
        self.__print_exit_message("E")

    def n(self) -> None:
        # Production N --> 0 | 1 | 2 | 3
        self.__print_entry_message("N")
        print(self.__get_tab_string(), "Trying to read input", self.__get_current_input())
        if self.__current_input_matches_target('0') or \
                self.__current_input_matches_target('1') or \
                self.__current_input_matches_target('2') or \
                self.__current_input_matches_target('3'):
            self.__advance_input()  # move to the next current input character
            pass
        else:
            raise ValueError("Current input is " + str(self.__get_current_input()) + ", which")

    def o(self) -> None:
        # Production O --> + | *
        self.__print_entry_message("O")
        print(self.__get_tab_string(), "Trying to read input", self.__get_current_input())
        if self.__current_input_matches_target('+') or \
                self.__current_input_matches_target('*'):
            self.__advance_input()  # move to the next current input character
            pass
        else:
            raise ValueError("Current input is " + str(self.__get_current_input()) + ", which")

    ############################################################################
    # Helper functions for managing the input                                    #
    # One looks at the current input                                           #
    # Another reads the input and advances to the next input                   #
    # A third looks to see if the current input character matches a target     #
    # Convention in python is to prefix private functions by a double underscore #
    # https://www.geeksforgeeks.org/private-functions-in-python/                 #
    ############################################################################
    def __get_current_input(self) -> str:
        if self.num_chars_read == len(self.input):
            raise ValueError("Expected to read another input character but no inputs left to read")
        return self.input[self.num_chars_read]

    def __advance_input(self) -> None:
        if self.num_chars_read == len(self.input):
            raise ValueError("Expected to advance to next input character but no inputs left to read")
        self.num_chars_read += 1

    def __current_input_matches_target(self, target_input: str) -> bool:
        return self.__get_current_input() == target_input

    ########################
    # Other public functions #
    ########################
    def reset(self) -> None:
        self.num_chars_read = 0
        self.input = ""

    ###############################
    # Parse tree printing functions #
    ###############################
    def __print_entry_message(self, function_name: str) -> None:
        print(self.__get_tab_string(), "In", function_name, "function.")
        self.tree_depth += 1

    def __print_exit_message(self, function_name: str) -> None:
        self.tree_depth -= 1
        print(self.__get_tab_string(), "Returning from", function_name, ".")

    def __get_tab_string(self) -> str:
        tab_string = ""
        for d in range(self.tree_depth):
            tab_string += "\t"
        return tab_string



In [12]:
my_rdp: RDP = RDP()
try:
    my_rdp.parse_input('+12')
except ValueError as inst:
    message: tuple[str] = inst.args
    print(message)



Parsing string +12
 In E function.
	 Trying to read input +
	 In O function.
		 Trying to read input +
		 In E function.
			 Trying to read input 1
			 In N function.
				 Trying to read input 1
			 Returning from E .
			 In E function.
				 Trying to read input 2
				 In N function.
					 Trying to read input 2
				 Returning from E .
			 Returning from E .
Successfully parsed string


In [13]:
my_rdp.reset()
try:
    my_rdp.parse_input('+123')
except ValueError as inst:
    message: tuple[str] = inst.args
    print(message)


Parsing string +123
			 In E function.
				 Trying to read input +
				 In O function.
					 Trying to read input +
					 In E function.
						 Trying to read input 1
						 In N function.
							 Trying to read input 1
						 Returning from E .
						 In E function.
							 Trying to read input 2
							 In N function.
								 Trying to read input 2
							 Returning from E .
						 Returning from E .
("End of parse and these characters haven't been read: 3",)


In [14]:
my_rdp.reset()
try:
    my_rdp.parse_input('+1')
except ValueError as inst:
    message: tuple[str] = inst.args
    print(message)

Parsing string +1
						 In E function.
							 Trying to read input +
							 In O function.
								 Trying to read input +
								 In E function.
									 Trying to read input 1
									 In N function.
										 Trying to read input 1
									 Returning from E .
									 In E function.
('Expected to read another input character but no inputs left to read',)


In [15]:
my_rdp.reset()
try:
    my_rdp.parse_input('+1A')
except ValueError as inst:
    message: tuple[str] = inst.args
    print(message)


Parsing string +1A
										 In E function.
											 Trying to read input +
											 In O function.
												 Trying to read input +
												 In E function.
													 Trying to read input 1
													 In N function.
														 Trying to read input 1
													 Returning from E .
													 In E function.
														 Trying to read input A
("Current input is A, which cannot be produced by 'E'",)


In [16]:
my_rdp.reset()
try:
    my_rdp.parse_input('-31')
except ValueError as inst:
    message: tuple[str] = inst.args
    print(message)

Parsing string -31
														 In E function.
															 Trying to read input -
															 In O function.
																 Trying to read input -
('Current input is -, which',)
