In [None]:
from IPython.core.display import HTML
with open ("../style.css", "r") as file:
    css = file.read()
HTML(css)

In [None]:
%load_ext nb_mypy

# A Shift-Reduce Parser for Arithmetic Expressions

In this notebook we implement a simple *recursive descend* parser for arithmetic expressions.
This parser will implement the following grammar:
$$
  \begin{eqnarray*}
  \mathrm{expr}        & \rightarrow & \mathrm{expr}\;\;\texttt{'+'}\;\;\mathrm{product}   \\
                       & \mid        & \mathrm{expr}\;\;\texttt{'-'}\;\;\mathrm{product}   \\
                       & \mid        & \mathrm{product}                                    \\[0.2cm]
  \mathrm{product}     & \rightarrow & \mathrm{product}\;\;\texttt{'*'}\;\;\mathrm{factor} \\
                       & \mid        & \mathrm{product}\;\;\texttt{'/'}\;\;\mathrm{factor} \\
                       & \mid        & \mathrm{factor}                                     \\[0.2cm]
  \mathrm{factor}      & \rightarrow & \texttt{'('} \;\;\mathrm{expr} \;\;\texttt{')'}     \\
                       & \mid        & \texttt{NUMBER} 
  \end{eqnarray*}
$$

In [None]:
import re

The function `tokenize` transforms the string `s` into a list of tokens. See below for an example.

In [None]:
def tokenize(s: str) -> list[str]:
    '''Transform the string s into a list of tokens.  The string s
       is supposed to represent an arithmetic expression.
    '''
    lexSpec = r'''([ \t\n]+)      |  # blanks and tabs
                  ([1-9][0-9]*|0) |  # number
                  ([-+*/()])      |  # arithmetical operators
                  (.)                # unrecognized character
               '''
    tokenList = re.findall(lexSpec, s, re.VERBOSE)
    result    = []
    for ws, number, operator, error in tokenList:
        if ws:        # skip blanks and tabs
            continue
        elif number:
            result += [ 'NUMBER' ]
        elif operator:
            result += [ operator ]
        else:
            result += [ f'ERROR({error})']
    return result

In [None]:
tokenize('1 + 2 * (3 - 4)')

In [None]:
Rule       = tuple[str, tuple[str, ...]]
Action     = str | tuple[str, str] | tuple[str, Rule]
actionTable: dict[tuple[str, str], Action]
gotoTable  : dict[tuple[str, str], str]
stateTable : dict[str, set[str]]

In [None]:
class ShiftReduceParser():
    def __init__(self, 
                 actionTable: dict[tuple[str, str], Action],
                 gotoTable  : dict[tuple[str, str], str], 
                 stateTable : dict[str, set[str]]):
        self.mActionTable = actionTable
        self.mGotoTable   = gotoTable
        self.mStateTable  = stateTable
        
    def parse(self, TL: list[str]) -> bool:
        return None # type: ignore

In [None]:
def parse(self, TL: list[str]) -> bool:
    index   = 0      # points to next token
    Symbols: list[str] = []     # stack of symbols
    States : list[str] = ['s0'] # stack of states, s0 is start state
    TL     += ['EOF']
    while True:
        q = States[-1]
        t = TL[index]
        print(f'States:  [ {", ".join(States)} ]')
        print('Symbols:', ' '.join(Symbols + ['|'] + TL[index:]).strip())
        print('State:   {', ", ".join(self.mStateTable[q]), '}')
        match self.mActionTable.get((q, t), 'error'):
            case 'error': 
                print(f'Action({q}, {t}) undefined.')
                print('Syntax error!\n')
                return False
            case 'accept':
                print('Accepting!\n')
                return True
            case 'shift', s:
                print(f'Shifting state {s}')
                print('State:   {', ', '.join(self.mStateTable[s]), '}\n')
                Symbols += [t]
                States  += [s]
                index   += 1
            case 'reduce', rule:
                head, body = rule
                print(f'Reducing with rule {head} → {" ".join(body)}')
                n       = len(body)
                Symbols = Symbols[:-n]
                States  = States [:-n] 
                Symbols = Symbols + [head]
                state   = States[-1]
                States += [ self.mGotoTable[state, head] ]
                print('State:   {', ', '.join(self.mStateTable[self.mGotoTable[state, head]]), '}\n')
            
ShiftReduceParser.parse = parse # type: ignore
del parse

In [None]:
%run Parse-Table.ipynb

## Testing

In [None]:
def test(s: str) -> None: 
    parser = ShiftReduceParser(actionTable, gotoTable, stateTable)
    TL     = tokenize(s)
    print(f'tokenlist: {TL}\n')
    if parser.parse(TL):
        print('Parse successful!')
    else:
        print('Parse failed!')

In [None]:
test('1 + 2 * 3')

In [None]:
test('1 + 2 * (3 - 4)')